1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
53 #![stable(feature = "rust1", since = "1.0.0")]
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
58 use core::char::CharExt;
59 use core::clone::Clone;
60 use core::iter::AdditiveIterator;
61 use core::iter::{Iterator, IteratorExt};
63 use core::ops::RangeFull;
64 use core::option::Option::{self, Some, None};
65 use core::result::Result;
66 use core::slice::AsSlice;
67 use core::str as core_str;
68 use unicode::str::{UnicodeStr, Utf16Encoder};
70 use vec_deque::VecDeque;
71 use borrow::{Borrow, ToOwned};
76 use slice::SliceConcatExt;
78 pub use core::str::{FromStr, Utf8Error, Str};
79 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
80 pub use core::str::{Split, SplitTerminator};
81 pub use core::str::{SplitN, RSplitN};
82 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
83 pub use core::str::{from_utf8_unchecked, from_c_str, ParseBoolError};
84 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
87 Section: Creating a string
90 impl<S: Str> SliceConcatExt<str, String> for [S] {
91 fn concat(&self) -> String {
92 let s = self.as_slice();
98 // `len` calculation may overflow but push_str will check boundaries
99 let len = s.iter().map(|s| s.as_slice().len()).sum();
100 let mut result = String::with_capacity(len);
103 result.push_str(s.as_slice())
109 fn connect(&self, sep: &str) -> String {
110 let s = self.as_slice();
113 return String::new();
121 // this is wrong without the guarantee that `self` is non-empty
122 // `len` calculation may overflow but push_str but will check boundaries
123 let len = sep.len() * (s.len() - 1)
124 + s.iter().map(|s| s.as_slice().len()).sum();
125 let mut result = String::with_capacity(len);
126 let mut first = true;
132 result.push_str(sep);
134 result.push_str(s.as_slice());
144 // Helper functions used for Unicode normalization
145 fn canonical_sort(comb: &mut [(char, u8)]) {
146 let len = comb.len();
148 let mut swapped = false;
150 let class_a = comb[j-1].1;
151 let class_b = comb[j].1;
152 if class_a != 0 && class_b != 0 && class_a > class_b {
157 if !swapped { break; }
162 enum DecompositionType {
167 /// External iterator for a string's decomposition's characters.
168 /// Use with the `std::iter` module.
170 #[unstable(feature = "collections")]
171 pub struct Decompositions<'a> {
172 kind: DecompositionType,
174 buffer: Vec<(char, u8)>,
178 #[stable(feature = "rust1", since = "1.0.0")]
179 impl<'a> Iterator for Decompositions<'a> {
183 fn next(&mut self) -> Option<char> {
184 match self.buffer.first() {
187 self.buffer.remove(0);
190 Some(&(c, _)) if self.sorted => {
191 self.buffer.remove(0);
194 _ => self.sorted = false
198 for ch in self.iter.by_ref() {
199 let buffer = &mut self.buffer;
200 let sorted = &mut self.sorted;
204 unicode::char::canonical_combining_class(d);
205 if class == 0 && !*sorted {
206 canonical_sort(buffer);
209 buffer.push((d, class));
213 unicode::char::decompose_canonical(ch, callback)
216 unicode::char::decompose_compatible(ch, callback)
227 canonical_sort(&mut self.buffer);
231 if self.buffer.is_empty() {
234 match self.buffer.remove(0) {
244 fn size_hint(&self) -> (usize, Option<usize>) {
245 let (lower, _) = self.iter.size_hint();
251 enum RecompositionState {
257 /// External iterator for a string's recomposition's characters.
258 /// Use with the `std::iter` module.
260 #[unstable(feature = "collections")]
261 pub struct Recompositions<'a> {
262 iter: Decompositions<'a>,
263 state: RecompositionState,
264 buffer: VecDeque<char>,
265 composee: Option<char>,
269 #[stable(feature = "rust1", since = "1.0.0")]
270 impl<'a> Iterator for Recompositions<'a> {
274 fn next(&mut self) -> Option<char> {
278 for ch in self.iter.by_ref() {
279 let ch_class = unicode::char::canonical_combining_class(ch);
280 if self.composee.is_none() {
284 self.composee = Some(ch);
287 let k = self.composee.clone().unwrap();
289 match self.last_ccc {
291 match unicode::char::compose(k, ch) {
293 self.composee = Some(r);
298 self.composee = Some(ch);
301 self.buffer.push_back(ch);
302 self.last_ccc = Some(ch_class);
307 if l_class >= ch_class {
308 // `ch` is blocked from `composee`
310 self.composee = Some(ch);
311 self.last_ccc = None;
312 self.state = Purging;
315 self.buffer.push_back(ch);
316 self.last_ccc = Some(ch_class);
319 match unicode::char::compose(k, ch) {
321 self.composee = Some(r);
325 self.buffer.push_back(ch);
326 self.last_ccc = Some(ch_class);
332 self.state = Finished;
333 if self.composee.is_some() {
334 return self.composee.take();
338 match self.buffer.pop_front() {
339 None => self.state = Composing,
344 match self.buffer.pop_front() {
345 None => return self.composee.take(),
354 /// External iterator for a string's UTF16 codeunits.
355 /// Use with the `std::iter` module.
357 #[unstable(feature = "collections")]
358 pub struct Utf16Units<'a> {
359 encoder: Utf16Encoder<Chars<'a>>
362 #[stable(feature = "rust1", since = "1.0.0")]
363 impl<'a> Iterator for Utf16Units<'a> {
367 fn next(&mut self) -> Option<u16> { self.encoder.next() }
370 fn size_hint(&self) -> (usize, Option<usize>) { self.encoder.size_hint() }
377 // Return the initial codepoint accumulator for the first byte.
378 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
379 // for width 3, and 3 bits for width 4
380 macro_rules! utf8_first_byte {
381 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
384 // return the value of $ch updated with continuation byte $byte
385 macro_rules! utf8_acc_cont_byte {
386 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
389 #[stable(feature = "rust1", since = "1.0.0")]
390 impl Borrow<str> for String {
391 fn borrow(&self) -> &str { &self[..] }
394 #[stable(feature = "rust1", since = "1.0.0")]
395 impl ToOwned for str {
397 fn to_owned(&self) -> String {
399 String::from_utf8_unchecked(self.as_bytes().to_owned())
409 Section: Trait implementations
412 /// Any string that can be represented as a slice.
413 #[stable(feature = "rust1", since = "1.0.0")]
414 pub trait StrExt: Index<RangeFull, Output = str> {
415 /// Escapes each char in `s` with `char::escape_default`.
416 #[unstable(feature = "collections",
417 reason = "return type may change to be an iterator")]
418 fn escape_default(&self) -> String {
419 self.chars().flat_map(|c| c.escape_default()).collect()
422 /// Escapes each char in `s` with `char::escape_unicode`.
423 #[unstable(feature = "collections",
424 reason = "return type may change to be an iterator")]
425 fn escape_unicode(&self) -> String {
426 self.chars().flat_map(|c| c.escape_unicode()).collect()
429 /// Replaces all occurrences of one string with another.
433 /// * `from` - The string to replace
434 /// * `to` - The replacement string
438 /// The original string with all occurrences of `from` replaced with `to`.
443 /// let s = "this is old";
445 /// assert_eq!(s.replace("old", "new"), "this is new");
447 /// // not found, so no change.
448 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
450 #[stable(feature = "rust1", since = "1.0.0")]
451 fn replace(&self, from: &str, to: &str) -> String {
452 let mut result = String::new();
453 let mut last_end = 0;
454 for (start, end) in self.match_indices(from) {
455 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
459 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
463 /// Returns an iterator over the string in Unicode Normalization Form D
464 /// (canonical decomposition).
466 #[unstable(feature = "collections",
467 reason = "this functionality may be moved to libunicode")]
468 fn nfd_chars(&self) -> Decompositions {
470 iter: self[..].chars(),
477 /// Returns an iterator over the string in Unicode Normalization Form KD
478 /// (compatibility decomposition).
480 #[unstable(feature = "collections",
481 reason = "this functionality may be moved to libunicode")]
482 fn nfkd_chars(&self) -> Decompositions {
484 iter: self[..].chars(),
491 /// An Iterator over the string in Unicode Normalization Form C
492 /// (canonical decomposition followed by canonical composition).
494 #[unstable(feature = "collections",
495 reason = "this functionality may be moved to libunicode")]
496 fn nfc_chars(&self) -> Recompositions {
498 iter: self.nfd_chars(),
500 buffer: VecDeque::new(),
506 /// An Iterator over the string in Unicode Normalization Form KC
507 /// (compatibility decomposition followed by canonical composition).
509 #[unstable(feature = "collections",
510 reason = "this functionality may be moved to libunicode")]
511 fn nfkc_chars(&self) -> Recompositions {
513 iter: self.nfkd_chars(),
515 buffer: VecDeque::new(),
521 /// Returns true if a string contains a string pattern.
525 /// - pat - The string pattern to look for
530 /// assert!("bananas".contains("nana"));
532 #[stable(feature = "rust1", since = "1.0.0")]
533 fn contains(&self, pat: &str) -> bool {
534 core_str::StrExt::contains(&self[..], pat)
537 /// Returns true if a string contains a char pattern.
541 /// - pat - The char pattern to look for
546 /// assert!("hello".contains_char('e'));
548 #[unstable(feature = "collections",
549 reason = "might get removed in favour of a more generic contains()")]
550 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
551 core_str::StrExt::contains_char(&self[..], pat)
554 /// An iterator over the characters of `self`. Note, this iterates
555 /// over Unicode code-points, not Unicode graphemes.
560 /// let v: Vec<char> = "abc åäö".chars().collect();
561 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
563 #[stable(feature = "rust1", since = "1.0.0")]
564 fn chars(&self) -> Chars {
565 core_str::StrExt::chars(&self[..])
568 /// An iterator over the bytes of `self`
573 /// let v: Vec<u8> = "bors".bytes().collect();
574 /// assert_eq!(v, b"bors".to_vec());
576 #[stable(feature = "rust1", since = "1.0.0")]
577 fn bytes(&self) -> Bytes {
578 core_str::StrExt::bytes(&self[..])
581 /// An iterator over the characters of `self` and their byte offsets.
582 #[stable(feature = "rust1", since = "1.0.0")]
583 fn char_indices(&self) -> CharIndices {
584 core_str::StrExt::char_indices(&self[..])
587 /// An iterator over substrings of `self`, separated by characters
588 /// matched by the pattern `pat`.
593 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
594 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
596 /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect();
597 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
599 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
600 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
602 /// let v: Vec<&str> = "".split('X').collect();
603 /// assert_eq!(v, vec![""]);
605 #[stable(feature = "rust1", since = "1.0.0")]
606 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
607 core_str::StrExt::split(&self[..], pat)
610 /// An iterator over substrings of `self`, separated by characters
611 /// matched by the pattern `pat`, restricted to splitting at most `count`
617 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
618 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
620 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |c: char| c.is_numeric()).collect();
621 /// assert_eq!(v, vec!["abc", "def2ghi"]);
623 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
624 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
626 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
627 /// assert_eq!(v, vec!["abcXdef"]);
629 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
630 /// assert_eq!(v, vec![""]);
632 #[stable(feature = "rust1", since = "1.0.0")]
633 fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> {
634 core_str::StrExt::splitn(&self[..], count, pat)
637 /// An iterator over substrings of `self`, separated by characters
638 /// matched by the pattern `pat`.
640 /// Equivalent to `split`, except that the trailing substring
641 /// is skipped if empty (terminator semantics).
646 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
647 /// assert_eq!(v, vec!["A", "B"]);
649 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
650 /// assert_eq!(v, vec!["A", "", "B", ""]);
652 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
653 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
655 /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).rev().collect();
656 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
658 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
659 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
661 #[unstable(feature = "collections", reason = "might get removed")]
662 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
663 core_str::StrExt::split_terminator(&self[..], pat)
666 /// An iterator over substrings of `self`, separated by characters
667 /// matched by the pattern `pat`, starting from the end of the string.
668 /// Restricted to splitting at most `count` times.
673 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
674 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
676 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |c: char| c.is_numeric()).collect();
677 /// assert_eq!(v, vec!["ghi", "abc1def"]);
679 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
680 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
682 #[stable(feature = "rust1", since = "1.0.0")]
683 fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> {
684 core_str::StrExt::rsplitn(&self[..], count, pat)
687 /// An iterator over the start and end indices of the disjoint
688 /// matches of the pattern `pat` within `self`.
690 /// That is, each returned value `(start, end)` satisfies
691 /// `self.slice(start, end) == sep`. For matches of `sep` within
692 /// `self` that overlap, only the indices corresponding to the
693 /// first match are returned.
698 /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".match_indices("abc").collect();
699 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
701 /// let v: Vec<(usize, usize)> = "1abcabc2".match_indices("abc").collect();
702 /// assert_eq!(v, vec![(1,4), (4,7)]);
704 /// let v: Vec<(usize, usize)> = "ababa".match_indices("aba").collect();
705 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
707 #[unstable(feature = "collections",
708 reason = "might have its iterator type changed")]
709 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
710 core_str::StrExt::match_indices(&self[..], pat)
713 /// An iterator over the substrings of `self` separated by the pattern `sep`.
718 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
719 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
721 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
722 /// assert_eq!(v, vec!["1", "", "2"]);
724 #[unstable(feature = "collections",
725 reason = "might get removed in the future in favor of a more generic split()")]
726 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
727 core_str::StrExt::split_str(&self[..], pat)
730 /// An iterator over the lines of a string (subsequences separated
731 /// by `\n`). This does not include the empty string after a
737 /// let four_lines = "foo\nbar\n\nbaz\n";
738 /// let v: Vec<&str> = four_lines.lines().collect();
739 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
741 #[stable(feature = "rust1", since = "1.0.0")]
742 fn lines(&self) -> Lines {
743 core_str::StrExt::lines(&self[..])
746 /// An iterator over the lines of a string, separated by either
747 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
748 /// empty trailing line.
753 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
754 /// let v: Vec<&str> = four_lines.lines_any().collect();
755 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
757 #[stable(feature = "rust1", since = "1.0.0")]
758 fn lines_any(&self) -> LinesAny {
759 core_str::StrExt::lines_any(&self[..])
762 /// Deprecated: use `s[a .. b]` instead.
763 #[unstable(feature = "collections",
764 reason = "use slice notation [a..b] instead")]
765 #[deprecated(since = "1.0.0", reason = "use slice notation [a..b] instead")]
766 fn slice(&self, begin: usize, end: usize) -> &str;
768 /// Deprecated: use `s[a..]` instead.
769 #[unstable(feature = "collections",
770 reason = "use slice notation [a..b] instead")]
771 #[deprecated(since = "1.0.0", reason = "use slice notation [a..] instead")]
772 fn slice_from(&self, begin: usize) -> &str;
774 /// Deprecated: use `s[..a]` instead.
775 #[unstable(feature = "collections",
776 reason = "use slice notation [a..b] instead")]
777 #[deprecated(since = "1.0.0", reason = "use slice notation [..a] instead")]
778 fn slice_to(&self, end: usize) -> &str;
780 /// Returns a slice of the string from the character range
781 /// [`begin`..`end`).
783 /// That is, start at the `begin`-th code point of the string and
784 /// continue to the `end`-th code point. This does not detect or
785 /// handle edge cases such as leaving a combining character as the
786 /// first code point of the string.
788 /// Due to the design of UTF-8, this operation is `O(end)`.
789 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
790 /// variants that use byte indices rather than code point
793 /// Panics if `begin` > `end` or the either `begin` or `end` are
794 /// beyond the last character of the string.
799 /// let s = "Löwe 老虎 Léopard";
800 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
801 /// assert_eq!(s.slice_chars(5, 7), "老虎");
803 #[unstable(feature = "collections",
804 reason = "may have yet to prove its worth")]
805 fn slice_chars(&self, begin: usize, end: usize) -> &str {
806 core_str::StrExt::slice_chars(&self[..], begin, end)
809 /// Takes a bytewise (not UTF-8) slice from a string.
811 /// Returns the substring from [`begin`..`end`).
813 /// Caller must check both UTF-8 character boundaries and the boundaries of
814 /// the entire slice as well.
815 #[stable(feature = "rust1", since = "1.0.0")]
816 unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
817 core_str::StrExt::slice_unchecked(&self[..], begin, end)
820 /// Returns true if the pattern `pat` is a prefix of the string.
825 /// assert!("banana".starts_with("ba"));
827 #[stable(feature = "rust1", since = "1.0.0")]
828 fn starts_with(&self, pat: &str) -> bool {
829 core_str::StrExt::starts_with(&self[..], pat)
832 /// Returns true if the pattern `pat` is a suffix of the string.
837 /// assert!("banana".ends_with("nana"));
839 #[stable(feature = "rust1", since = "1.0.0")]
840 fn ends_with(&self, pat: &str) -> bool {
841 core_str::StrExt::ends_with(&self[..], pat)
844 /// Returns a string with all pre- and suffixes that match
845 /// the pattern `pat` repeatedly removed.
849 /// * pat - a string pattern
854 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
855 /// let x: &[_] = &['1', '2'];
856 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
857 /// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar");
859 #[stable(feature = "rust1", since = "1.0.0")]
860 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
861 core_str::StrExt::trim_matches(&self[..], pat)
864 /// Returns a string with all prefixes that match
865 /// the pattern `pat` repeatedly removed.
869 /// * pat - a string pattern
874 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
875 /// let x: &[_] = &['1', '2'];
876 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
877 /// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123");
879 #[stable(feature = "rust1", since = "1.0.0")]
880 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
881 core_str::StrExt::trim_left_matches(&self[..], pat)
884 /// Returns a string with all suffixes that match
885 /// the pattern `pat` repeatedly removed.
889 /// * pat - a string pattern
894 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
895 /// let x: &[_] = &['1', '2'];
896 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
897 /// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar");
899 #[stable(feature = "rust1", since = "1.0.0")]
900 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
901 core_str::StrExt::trim_right_matches(&self[..], pat)
904 /// Check that `index`-th byte lies at the start and/or end of a
905 /// UTF-8 code point sequence.
907 /// The start and end of the string (when `index == self.len()`)
908 /// are considered to be boundaries.
910 /// Panics if `index` is greater than `self.len()`.
915 /// let s = "Löwe 老虎 Léopard";
916 /// assert!(s.is_char_boundary(0));
918 /// assert!(s.is_char_boundary(6));
919 /// assert!(s.is_char_boundary(s.len()));
921 /// // second byte of `ö`
922 /// assert!(!s.is_char_boundary(2));
924 /// // third byte of `老`
925 /// assert!(!s.is_char_boundary(8));
927 #[unstable(feature = "collections",
928 reason = "naming is uncertain with container conventions")]
929 fn is_char_boundary(&self, index: usize) -> bool {
930 core_str::StrExt::is_char_boundary(&self[..], index)
933 /// Pluck a character out of a string and return the index of the next
936 /// This function can be used to iterate over the Unicode characters of a
941 /// This example manually iterates through the characters of a
942 /// string; this should normally be done by `.chars()` or
946 /// use std::str::CharRange;
948 /// let s = "中华Việt Nam";
950 /// while i < s.len() {
951 /// let CharRange {ch, next} = s.char_range_at(i);
952 /// println!("{}: {}", i, ch);
975 /// * i - The byte offset of the char to extract
979 /// A record {ch: char, next: usize} containing the char value and the byte
980 /// index of the next Unicode character.
984 /// If `i` is greater than or equal to the length of the string.
985 /// If `i` is not the index of the beginning of a valid UTF-8 character.
986 #[unstable(feature = "collections",
987 reason = "naming is uncertain with container conventions")]
988 fn char_range_at(&self, start: usize) -> CharRange {
989 core_str::StrExt::char_range_at(&self[..], start)
992 /// Given a byte position and a str, return the previous char and its position.
994 /// This function can be used to iterate over a Unicode string in reverse.
996 /// Returns 0 for next index if called on start index 0.
1000 /// If `i` is greater than the length of the string.
1001 /// If `i` is not an index following a valid UTF-8 character.
1002 #[unstable(feature = "collections",
1003 reason = "naming is uncertain with container conventions")]
1004 fn char_range_at_reverse(&self, start: usize) -> CharRange {
1005 core_str::StrExt::char_range_at_reverse(&self[..], start)
1008 /// Plucks the character starting at the `i`th byte of a string.
1014 /// assert_eq!(s.char_at(1), 'b');
1015 /// assert_eq!(s.char_at(2), 'π');
1016 /// assert_eq!(s.char_at(4), 'c');
1021 /// If `i` is greater than or equal to the length of the string.
1022 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1023 #[unstable(feature = "collections",
1024 reason = "naming is uncertain with container conventions")]
1025 fn char_at(&self, i: usize) -> char {
1026 core_str::StrExt::char_at(&self[..], i)
1029 /// Plucks the character ending at the `i`th byte of a string.
1033 /// If `i` is greater than the length of the string.
1034 /// If `i` is not an index following a valid UTF-8 character.
1035 #[unstable(feature = "collections",
1036 reason = "naming is uncertain with container conventions")]
1037 fn char_at_reverse(&self, i: usize) -> char {
1038 core_str::StrExt::char_at_reverse(&self[..], i)
1041 /// Work with the byte buffer of a string as a byte slice.
1046 /// assert_eq!("bors".as_bytes(), b"bors");
1048 #[stable(feature = "rust1", since = "1.0.0")]
1049 fn as_bytes(&self) -> &[u8] {
1050 core_str::StrExt::as_bytes(&self[..])
1053 /// Returns the byte index of the first character of `self` that
1054 /// matches the pattern `pat`.
1058 /// `Some` containing the byte index of the last matching character
1059 /// or `None` if there is no match
1064 /// let s = "Löwe 老虎 Léopard";
1066 /// assert_eq!(s.find('L'), Some(0));
1067 /// assert_eq!(s.find('é'), Some(14));
1069 /// // the first space
1070 /// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5));
1072 /// // neither are found
1073 /// let x: &[_] = &['1', '2'];
1074 /// assert_eq!(s.find(x), None);
1076 #[stable(feature = "rust1", since = "1.0.0")]
1077 fn find<P: CharEq>(&self, pat: P) -> Option<usize> {
1078 core_str::StrExt::find(&self[..], pat)
1081 /// Returns the byte index of the last character of `self` that
1082 /// matches the pattern `pat`.
1086 /// `Some` containing the byte index of the last matching character
1087 /// or `None` if there is no match.
1092 /// let s = "Löwe 老虎 Léopard";
1094 /// assert_eq!(s.rfind('L'), Some(13));
1095 /// assert_eq!(s.rfind('é'), Some(14));
1097 /// // the second space
1098 /// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12));
1100 /// // searches for an occurrence of either `1` or `2`, but neither are found
1101 /// let x: &[_] = &['1', '2'];
1102 /// assert_eq!(s.rfind(x), None);
1104 #[stable(feature = "rust1", since = "1.0.0")]
1105 fn rfind<P: CharEq>(&self, pat: P) -> Option<usize> {
1106 core_str::StrExt::rfind(&self[..], pat)
1109 /// Returns the byte index of the first matching substring
1113 /// * `needle` - The string to search for
1117 /// `Some` containing the byte index of the first matching substring
1118 /// or `None` if there is no match.
1123 /// let s = "Löwe 老虎 Léopard";
1125 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1126 /// assert_eq!(s.find_str("muffin man"), None);
1128 #[unstable(feature = "collections",
1129 reason = "might get removed in favor of a more generic find in the future")]
1130 fn find_str(&self, needle: &str) -> Option<usize> {
1131 core_str::StrExt::find_str(&self[..], needle)
1134 /// Retrieves the first character from a string slice and returns
1135 /// it. This does not allocate a new string; instead, it returns a
1136 /// slice that point one character beyond the character that was
1137 /// shifted. If the string does not contain any characters,
1138 /// None is returned instead.
1143 /// let s = "Löwe 老虎 Léopard";
1144 /// let (c, s1) = s.slice_shift_char().unwrap();
1145 /// assert_eq!(c, 'L');
1146 /// assert_eq!(s1, "öwe 老虎 Léopard");
1148 /// let (c, s2) = s1.slice_shift_char().unwrap();
1149 /// assert_eq!(c, 'ö');
1150 /// assert_eq!(s2, "we 老虎 Léopard");
1152 #[unstable(feature = "collections",
1153 reason = "awaiting conventions about shifting and slices")]
1154 fn slice_shift_char(&self) -> Option<(char, &str)> {
1155 core_str::StrExt::slice_shift_char(&self[..])
1158 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1160 /// Panics if `inner` is not a direct slice contained within self.
1165 /// let string = "a\nb\nc";
1166 /// let lines: Vec<&str> = string.lines().collect();
1168 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1169 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1170 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1172 #[unstable(feature = "collections",
1173 reason = "awaiting convention about comparability of arbitrary slices")]
1174 fn subslice_offset(&self, inner: &str) -> usize {
1175 core_str::StrExt::subslice_offset(&self[..], inner)
1178 /// Return an unsafe pointer to the strings buffer.
1180 /// The caller must ensure that the string outlives this pointer,
1181 /// and that it is not reallocated (e.g. by pushing to the
1183 #[stable(feature = "rust1", since = "1.0.0")]
1185 fn as_ptr(&self) -> *const u8 {
1186 core_str::StrExt::as_ptr(&self[..])
1189 /// Return an iterator of `u16` over the string encoded as UTF-16.
1190 #[unstable(feature = "collections",
1191 reason = "this functionality may only be provided by libunicode")]
1192 fn utf16_units(&self) -> Utf16Units {
1193 Utf16Units { encoder: Utf16Encoder::new(self[..].chars()) }
1196 /// Return the number of bytes in this string
1201 /// assert_eq!("foo".len(), 3);
1202 /// assert_eq!("ƒoo".len(), 4);
1204 #[stable(feature = "rust1", since = "1.0.0")]
1206 fn len(&self) -> usize {
1207 core_str::StrExt::len(&self[..])
1210 /// Returns true if this slice contains no bytes
1215 /// assert!("".is_empty());
1218 #[stable(feature = "rust1", since = "1.0.0")]
1219 fn is_empty(&self) -> bool {
1220 core_str::StrExt::is_empty(&self[..])
1223 /// Parse this string into the specified type.
1228 /// assert_eq!("4".parse::<u32>(), Ok(4));
1229 /// assert!("j".parse::<u32>().is_err());
1232 #[stable(feature = "rust1", since = "1.0.0")]
1233 fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
1234 core_str::StrExt::parse(&self[..])
1237 /// Returns an iterator over the
1238 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1241 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1242 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1243 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1244 /// recommends extended grapheme cluster boundaries for general processing.
1249 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1250 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1251 /// assert_eq!(gr1.as_slice(), b);
1252 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1253 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1254 /// assert_eq!(gr2.as_slice(), b);
1256 #[unstable(feature = "collections",
1257 reason = "this functionality may only be provided by libunicode")]
1258 fn graphemes(&self, is_extended: bool) -> Graphemes {
1259 UnicodeStr::graphemes(&self[..], is_extended)
1262 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1263 /// See `graphemes()` method for more information.
1268 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(usize, &str)>>();
1269 /// let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1270 /// assert_eq!(gr_inds.as_slice(), b);
1272 #[unstable(feature = "collections",
1273 reason = "this functionality may only be provided by libunicode")]
1274 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1275 UnicodeStr::grapheme_indices(&self[..], is_extended)
1278 /// An iterator over the words of a string (subsequences separated
1279 /// by any sequence of whitespace). Sequences of whitespace are
1280 /// collapsed, so empty "words" are not included.
1285 /// let some_words = " Mary had\ta little \n\t lamb";
1286 /// let v: Vec<&str> = some_words.words().collect();
1287 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1289 #[unstable(feature = "str_words",
1290 reason = "the precise algorithm to use is unclear")]
1291 fn words(&self) -> Words {
1292 UnicodeStr::words(&self[..])
1295 /// Returns a string's displayed width in columns, treating control
1296 /// characters as zero-width.
1298 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1299 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1300 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1301 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1302 /// recommends that these characters be treated as 1 column (i.e.,
1303 /// `is_cjk` = `false`) if the locale is unknown.
1304 #[unstable(feature = "collections",
1305 reason = "this functionality may only be provided by libunicode")]
1306 fn width(&self, is_cjk: bool) -> usize {
1307 UnicodeStr::width(&self[..], is_cjk)
1310 /// Returns a string with leading and trailing whitespace removed.
1311 #[stable(feature = "rust1", since = "1.0.0")]
1312 fn trim(&self) -> &str {
1313 UnicodeStr::trim(&self[..])
1316 /// Returns a string with leading whitespace removed.
1317 #[stable(feature = "rust1", since = "1.0.0")]
1318 fn trim_left(&self) -> &str {
1319 UnicodeStr::trim_left(&self[..])
1322 /// Returns a string with trailing whitespace removed.
1323 #[stable(feature = "rust1", since = "1.0.0")]
1324 fn trim_right(&self) -> &str {
1325 UnicodeStr::trim_right(&self[..])
1329 #[stable(feature = "rust1", since = "1.0.0")]
1330 impl StrExt for str {
1331 fn slice(&self, begin: usize, end: usize) -> &str {
1335 fn slice_from(&self, begin: usize) -> &str {
1339 fn slice_to(&self, end: usize) -> &str {
1348 use core::iter::AdditiveIterator;
1349 use super::from_utf8;
1350 use super::Utf8Error;
1355 assert!("" <= "foo");
1356 assert!("foo" <= "foo");
1357 assert!("foo" != "bar");
1362 assert_eq!("".len(), 0);
1363 assert_eq!("hello world".len(), 11);
1364 assert_eq!("\x63".len(), 1);
1365 assert_eq!("\u{a2}".len(), 2);
1366 assert_eq!("\u{3c0}".len(), 2);
1367 assert_eq!("\u{2620}".len(), 3);
1368 assert_eq!("\u{1d11e}".len(), 4);
1370 assert_eq!("".chars().count(), 0);
1371 assert_eq!("hello world".chars().count(), 11);
1372 assert_eq!("\x63".chars().count(), 1);
1373 assert_eq!("\u{a2}".chars().count(), 1);
1374 assert_eq!("\u{3c0}".chars().count(), 1);
1375 assert_eq!("\u{2620}".chars().count(), 1);
1376 assert_eq!("\u{1d11e}".chars().count(), 1);
1377 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19);
1379 assert_eq!("hello".width(false), 10);
1380 assert_eq!("hello".width(true), 10);
1381 assert_eq!("\0\0\0\0\0".width(false), 0);
1382 assert_eq!("\0\0\0\0\0".width(true), 0);
1383 assert_eq!("".width(false), 0);
1384 assert_eq!("".width(true), 0);
1385 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4);
1386 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8);
1391 assert_eq!("hello".find('l'), Some(2));
1392 assert_eq!("hello".find(|c:char| c == 'o'), Some(4));
1393 assert!("hello".find('x').is_none());
1394 assert!("hello".find(|c:char| c == 'x').is_none());
1395 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30));
1396 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30));
1401 assert_eq!("hello".rfind('l'), Some(3));
1402 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4));
1403 assert!("hello".rfind('x').is_none());
1404 assert!("hello".rfind(|c:char| c == 'x').is_none());
1405 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30));
1406 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30));
1411 let empty = String::from_str("");
1412 let s: String = empty.chars().collect();
1413 assert_eq!(empty, s);
1414 let data = String::from_str("ประเทศไทย中");
1415 let s: String = data.chars().collect();
1416 assert_eq!(data, s);
1420 fn test_into_bytes() {
1421 let data = String::from_str("asdf");
1422 let buf = data.into_bytes();
1423 assert_eq!(b"asdf", buf);
1427 fn test_find_str() {
1429 assert_eq!("".find_str(""), Some(0));
1430 assert!("banana".find_str("apple pie").is_none());
1432 let data = "abcabc";
1433 assert_eq!(data[0..6].find_str("ab"), Some(0));
1434 assert_eq!(data[2..6].find_str("ab"), Some(3 - 2));
1435 assert!(data[2..4].find_str("ab").is_none());
1437 let string = "ประเทศไทย中华Việt Nam";
1438 let mut data = String::from_str(string);
1439 data.push_str(string);
1440 assert!(data.find_str("ไท华").is_none());
1441 assert_eq!(data[0..43].find_str(""), Some(0));
1442 assert_eq!(data[6..43].find_str(""), Some(6 - 6));
1444 assert_eq!(data[0..43].find_str("ประ"), Some( 0));
1445 assert_eq!(data[0..43].find_str("ทศไ"), Some(12));
1446 assert_eq!(data[0..43].find_str("ย中"), Some(24));
1447 assert_eq!(data[0..43].find_str("iệt"), Some(34));
1448 assert_eq!(data[0..43].find_str("Nam"), Some(40));
1450 assert_eq!(data[43..86].find_str("ประ"), Some(43 - 43));
1451 assert_eq!(data[43..86].find_str("ทศไ"), Some(55 - 43));
1452 assert_eq!(data[43..86].find_str("ย中"), Some(67 - 43));
1453 assert_eq!(data[43..86].find_str("iệt"), Some(77 - 43));
1454 assert_eq!(data[43..86].find_str("Nam"), Some(83 - 43));
1458 fn test_slice_chars() {
1459 fn t(a: &str, b: &str, start: usize) {
1460 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1463 t("hello", "llo", 2);
1464 t("hello", "el", 1);
1467 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1470 fn s(x: &str) -> String { x.to_string() }
1472 macro_rules! test_concat {
1473 ($expected: expr, $string: expr) => {
1475 let s: String = $string.concat();
1476 assert_eq!($expected, s);
1482 fn test_concat_for_different_types() {
1483 test_concat!("ab", vec![s("a"), s("b")]);
1484 test_concat!("ab", vec!["a", "b"]);
1485 test_concat!("ab", vec!["a", "b"]);
1486 test_concat!("ab", vec![s("a"), s("b")]);
1490 fn test_concat_for_different_lengths() {
1491 let empty: &[&str] = &[];
1492 test_concat!("", empty);
1493 test_concat!("a", ["a"]);
1494 test_concat!("ab", ["a", "b"]);
1495 test_concat!("abc", ["", "a", "bc"]);
1498 macro_rules! test_connect {
1499 ($expected: expr, $string: expr, $delim: expr) => {
1501 let s = $string.connect($delim);
1502 assert_eq!($expected, s);
1508 fn test_connect_for_different_types() {
1509 test_connect!("a-b", ["a", "b"], "-");
1510 let hyphen = "-".to_string();
1511 test_connect!("a-b", [s("a"), s("b")], &*hyphen);
1512 test_connect!("a-b", vec!["a", "b"], &*hyphen);
1513 test_connect!("a-b", &*vec!["a", "b"], "-");
1514 test_connect!("a-b", vec![s("a"), s("b")], "-");
1518 fn test_connect_for_different_lengths() {
1519 let empty: &[&str] = &[];
1520 test_connect!("", empty, "-");
1521 test_connect!("a", ["a"], "-");
1522 test_connect!("a-b", ["a", "b"], "-");
1523 test_connect!("-a-bc", ["", "a", "bc"], "-");
1527 fn test_unsafe_slice() {
1528 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1529 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1530 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1531 fn a_million_letter_a() -> String {
1533 let mut rs = String::new();
1535 rs.push_str("aaaaaaaaaa");
1540 fn half_a_million_letter_a() -> String {
1542 let mut rs = String::new();
1544 rs.push_str("aaaaa");
1549 let letters = a_million_letter_a();
1550 assert!(half_a_million_letter_a() ==
1551 unsafe {String::from_str(letters.slice_unchecked(
1557 fn test_starts_with() {
1558 assert!(("".starts_with("")));
1559 assert!(("abc".starts_with("")));
1560 assert!(("abc".starts_with("a")));
1561 assert!((!"a".starts_with("abc")));
1562 assert!((!"".starts_with("abc")));
1563 assert!((!"ödd".starts_with("-")));
1564 assert!(("ödd".starts_with("öd")));
1568 fn test_ends_with() {
1569 assert!(("".ends_with("")));
1570 assert!(("abc".ends_with("")));
1571 assert!(("abc".ends_with("c")));
1572 assert!((!"a".ends_with("abc")));
1573 assert!((!"".ends_with("abc")));
1574 assert!((!"ddö".ends_with("-")));
1575 assert!(("ddö".ends_with("dö")));
1579 fn test_is_empty() {
1580 assert!("".is_empty());
1581 assert!(!"a".is_empty());
1587 assert_eq!("".replace(a, "b"), String::from_str(""));
1588 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1589 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1591 assert!(" test test ".replace(test, "toast") ==
1592 String::from_str(" toast toast "));
1593 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1597 fn test_replace_2a() {
1598 let data = "ประเทศไทย中华";
1599 let repl = "دولة الكويت";
1602 let a2 = "دولة الكويتทศไทย中华";
1603 assert_eq!(data.replace(a, repl), a2);
1607 fn test_replace_2b() {
1608 let data = "ประเทศไทย中华";
1609 let repl = "دولة الكويت";
1612 let b2 = "ปรدولة الكويتทศไทย中华";
1613 assert_eq!(data.replace(b, repl), b2);
1617 fn test_replace_2c() {
1618 let data = "ประเทศไทย中华";
1619 let repl = "دولة الكويت";
1622 let c2 = "ประเทศไทยدولة الكويت";
1623 assert_eq!(data.replace(c, repl), c2);
1627 fn test_replace_2d() {
1628 let data = "ประเทศไทย中华";
1629 let repl = "دولة الكويت";
1632 assert_eq!(data.replace(d, repl), data);
1637 assert_eq!("ab", "abc".slice(0, 2));
1638 assert_eq!("bc", "abc".slice(1, 3));
1639 assert_eq!("", "abc".slice(1, 1));
1640 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1642 let data = "ประเทศไทย中华";
1643 assert_eq!("ป", data.slice(0, 3));
1644 assert_eq!("ร", data.slice(3, 6));
1645 assert_eq!("", data.slice(3, 3));
1646 assert_eq!("华", data.slice(30, 33));
1648 fn a_million_letter_x() -> String {
1650 let mut rs = String::new();
1652 rs.push_str("华华华华华华华华华华");
1657 fn half_a_million_letter_x() -> String {
1659 let mut rs = String::new();
1661 rs.push_str("华华华华华");
1666 let letters = a_million_letter_x();
1667 assert!(half_a_million_letter_x() ==
1668 String::from_str(letters.slice(0, 3 * 500000)));
1673 let ss = "中华Việt Nam";
1675 assert_eq!("华", ss.slice(3, 6));
1676 assert_eq!("Việt Nam", ss.slice(6, 16));
1678 assert_eq!("ab", "abc".slice(0, 2));
1679 assert_eq!("bc", "abc".slice(1, 3));
1680 assert_eq!("", "abc".slice(1, 1));
1682 assert_eq!("中", ss.slice(0, 3));
1683 assert_eq!("华V", ss.slice(3, 7));
1684 assert_eq!("", ss.slice(3, 3));
1699 fn test_slice_fail() {
1700 "中华Việt Nam".slice(0, 2);
1704 fn test_slice_from() {
1705 assert_eq!("abcd".slice_from(0), "abcd");
1706 assert_eq!("abcd".slice_from(2), "cd");
1707 assert_eq!("abcd".slice_from(4), "");
1710 fn test_slice_to() {
1711 assert_eq!("abcd".slice_to(0), "");
1712 assert_eq!("abcd".slice_to(2), "ab");
1713 assert_eq!("abcd".slice_to(4), "abcd");
1717 fn test_trim_left_matches() {
1718 let v: &[char] = &[];
1719 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1720 let chars: &[char] = &['*', ' '];
1721 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1722 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1723 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1725 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1726 let chars: &[char] = &['1', '2'];
1727 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1728 assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123");
1732 fn test_trim_right_matches() {
1733 let v: &[char] = &[];
1734 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1735 let chars: &[char] = &['*', ' '];
1736 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1737 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1738 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1740 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1741 let chars: &[char] = &['1', '2'];
1742 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1743 assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar");
1747 fn test_trim_matches() {
1748 let v: &[char] = &[];
1749 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1750 let chars: &[char] = &['*', ' '];
1751 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1752 assert_eq!(" *** *** ".trim_matches(chars), "");
1753 assert_eq!("foo".trim_matches(chars), "foo");
1755 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1756 let chars: &[char] = &['1', '2'];
1757 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1758 assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar");
1762 fn test_trim_left() {
1763 assert_eq!("".trim_left(), "");
1764 assert_eq!("a".trim_left(), "a");
1765 assert_eq!(" ".trim_left(), "");
1766 assert_eq!(" blah".trim_left(), "blah");
1767 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1768 assert_eq!("hey ".trim_left(), "hey ");
1772 fn test_trim_right() {
1773 assert_eq!("".trim_right(), "");
1774 assert_eq!("a".trim_right(), "a");
1775 assert_eq!(" ".trim_right(), "");
1776 assert_eq!("blah ".trim_right(), "blah");
1777 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1778 assert_eq!(" hey".trim_right(), " hey");
1783 assert_eq!("".trim(), "");
1784 assert_eq!("a".trim(), "a");
1785 assert_eq!(" ".trim(), "");
1786 assert_eq!(" blah ".trim(), "blah");
1787 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1788 assert_eq!(" hey dude ".trim(), "hey dude");
1792 fn test_is_whitespace() {
1793 assert!("".chars().all(|c| c.is_whitespace()));
1794 assert!(" ".chars().all(|c| c.is_whitespace()));
1795 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1796 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1797 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1801 fn test_slice_shift_char() {
1802 let data = "ประเทศไทย中";
1803 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1807 fn test_slice_shift_char_2() {
1809 assert_eq!(empty.slice_shift_char(), None);
1814 // deny overlong encodings
1815 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1816 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1817 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1818 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1819 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1820 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1821 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1824 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1825 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1827 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1828 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1829 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1830 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1831 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1832 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1833 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1834 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1838 fn test_is_utf16() {
1839 use unicode::str::is_utf16;
1841 ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } }
1850 // surrogate pairs (randomly generated with Python 3's
1851 // .encode('utf-16be'))
1852 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1853 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1854 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1856 // mixtures (also random)
1857 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1858 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1859 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1863 ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } }
1867 // surrogate + regular unit
1869 // surrogate + lead surrogate
1871 // unterminated surrogate
1873 // trail surrogate without a lead
1876 // random byte sequences that Python 3's .decode('utf-16be')
1878 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1879 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1880 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1881 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1882 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1883 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1884 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1885 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1886 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1887 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1888 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1889 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1890 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1891 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1892 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1893 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1894 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1895 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1896 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1897 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1898 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1902 fn test_as_bytes() {
1905 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1906 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1910 assert_eq!("".as_bytes(), b);
1911 assert_eq!("abc".as_bytes(), b"abc");
1912 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1917 fn test_as_bytes_fail() {
1918 // Don't double free. (I'm not sure if this exercises the
1919 // original problem code path anymore.)
1920 let s = String::from_str("");
1921 let _bytes = s.as_bytes();
1927 let buf = "hello".as_ptr();
1929 assert_eq!(*buf.offset(0), b'h');
1930 assert_eq!(*buf.offset(1), b'e');
1931 assert_eq!(*buf.offset(2), b'l');
1932 assert_eq!(*buf.offset(3), b'l');
1933 assert_eq!(*buf.offset(4), b'o');
1938 fn test_subslice_offset() {
1939 let a = "kernelsprite";
1940 let b = &a[7..a.len()];
1941 let c = &a[0..a.len() - 6];
1942 assert_eq!(a.subslice_offset(b), 7);
1943 assert_eq!(a.subslice_offset(c), 0);
1945 let string = "a\nb\nc";
1946 let lines: Vec<&str> = string.lines().collect();
1947 assert_eq!(string.subslice_offset(lines[0]), 0);
1948 assert_eq!(string.subslice_offset(lines[1]), 2);
1949 assert_eq!(string.subslice_offset(lines[2]), 4);
1954 fn test_subslice_offset_2() {
1955 let a = "alchemiter";
1956 let b = "cruxtruder";
1957 a.subslice_offset(b);
1961 fn vec_str_conversions() {
1962 let s1: String = String::from_str("All mimsy were the borogoves");
1964 let v: Vec<u8> = s1.as_bytes().to_vec();
1965 let s2: String = String::from_str(from_utf8(&v).unwrap());
1971 let a: u8 = s1.as_bytes()[i];
1972 let b: u8 = s2.as_bytes()[i];
1981 fn test_contains() {
1982 assert!("abcde".contains("bcd"));
1983 assert!("abcde".contains("abcd"));
1984 assert!("abcde".contains("bcde"));
1985 assert!("abcde".contains(""));
1986 assert!("".contains(""));
1987 assert!(!"abcde".contains("def"));
1988 assert!(!"".contains("a"));
1990 let data = "ประเทศไทย中华Việt Nam";
1991 assert!(data.contains("ประเ"));
1992 assert!(data.contains("ะเ"));
1993 assert!(data.contains("中华"));
1994 assert!(!data.contains("ไท华"));
1998 fn test_contains_char() {
1999 assert!("abc".contains_char('b'));
2000 assert!("a".contains_char('a'));
2001 assert!(!"abc".contains_char('d'));
2002 assert!(!"".contains_char('a'));
2007 let s = "ศไทย中华Việt Nam";
2008 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2011 assert!(s.char_at(pos) == *ch);
2012 pos += ch.to_string().len();
2017 fn test_char_at_reverse() {
2018 let s = "ศไทย中华Việt Nam";
2019 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2020 let mut pos = s.len();
2021 for ch in v.iter().rev() {
2022 assert!(s.char_at_reverse(pos) == *ch);
2023 pos -= ch.to_string().len();
2028 fn test_escape_unicode() {
2029 assert_eq!("abc".escape_unicode(),
2030 String::from_str("\\u{61}\\u{62}\\u{63}"));
2031 assert_eq!("a c".escape_unicode(),
2032 String::from_str("\\u{61}\\u{20}\\u{63}"));
2033 assert_eq!("\r\n\t".escape_unicode(),
2034 String::from_str("\\u{d}\\u{a}\\u{9}"));
2035 assert_eq!("'\"\\".escape_unicode(),
2036 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2037 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2038 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2039 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2040 String::from_str("\\u{100}\\u{ffff}"));
2041 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2042 String::from_str("\\u{10000}\\u{10ffff}"));
2043 assert_eq!("ab\u{fb00}".escape_unicode(),
2044 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2045 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2046 String::from_str("\\u{1d4ea}\\u{d}"));
2050 fn test_escape_default() {
2051 assert_eq!("abc".escape_default(), String::from_str("abc"));
2052 assert_eq!("a c".escape_default(), String::from_str("a c"));
2053 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2054 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2055 assert_eq!("\u{100}\u{ffff}".escape_default(),
2056 String::from_str("\\u{100}\\u{ffff}"));
2057 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2058 String::from_str("\\u{10000}\\u{10ffff}"));
2059 assert_eq!("ab\u{fb00}".escape_default(),
2060 String::from_str("ab\\u{fb00}"));
2061 assert_eq!("\u{1d4ea}\r".escape_default(),
2062 String::from_str("\\u{1d4ea}\\r"));
2066 fn test_total_ord() {
2067 "1234".cmp("123") == Greater;
2068 "123".cmp("1234") == Less;
2069 "1234".cmp("1234") == Equal;
2070 "12345555".cmp("123456") == Less;
2071 "22".cmp("1234") == Greater;
2075 fn test_char_range_at() {
2076 let data = "b¢€𤭢𤭢€¢b";
2077 assert_eq!('b', data.char_range_at(0).ch);
2078 assert_eq!('¢', data.char_range_at(1).ch);
2079 assert_eq!('€', data.char_range_at(3).ch);
2080 assert_eq!('𤭢', data.char_range_at(6).ch);
2081 assert_eq!('𤭢', data.char_range_at(10).ch);
2082 assert_eq!('€', data.char_range_at(14).ch);
2083 assert_eq!('¢', data.char_range_at(17).ch);
2084 assert_eq!('b', data.char_range_at(19).ch);
2088 fn test_char_range_at_reverse_underflow() {
2089 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2093 fn test_iterator() {
2094 let s = "ศไทย中华Việt Nam";
2095 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2101 assert_eq!(c, v[pos]);
2104 assert_eq!(pos, v.len());
2108 fn test_rev_iterator() {
2109 let s = "ศไทย中华Việt Nam";
2110 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2113 let it = s.chars().rev();
2116 assert_eq!(c, v[pos]);
2119 assert_eq!(pos, v.len());
2123 fn test_chars_decoding() {
2124 let mut bytes = [0u8; 4];
2125 for c in (0u32..0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2126 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2127 let s = ::core::str::from_utf8(&bytes[..len]).unwrap();
2128 if Some(c) != s.chars().next() {
2129 panic!("character {:x}={} does not decode correctly", c as u32, c);
2135 fn test_chars_rev_decoding() {
2136 let mut bytes = [0u8; 4];
2137 for c in (0u32..0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2138 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2139 let s = ::core::str::from_utf8(&bytes[..len]).unwrap();
2140 if Some(c) != s.chars().rev().next() {
2141 panic!("character {:x}={} does not decode correctly", c as u32, c);
2147 fn test_iterator_clone() {
2148 let s = "ศไทย中华Việt Nam";
2149 let mut it = s.chars();
2151 assert!(it.clone().zip(it).all(|(x,y)| x == y));
2155 fn test_bytesator() {
2156 let s = "ศไทย中华Việt Nam";
2158 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2159 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2164 for b in s.bytes() {
2165 assert_eq!(b, v[pos]);
2171 fn test_bytes_revator() {
2172 let s = "ศไทย中华Việt Nam";
2174 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2175 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2178 let mut pos = v.len();
2180 for b in s.bytes().rev() {
2182 assert_eq!(b, v[pos]);
2187 fn test_char_indicesator() {
2188 let s = "ศไทย中华Việt Nam";
2189 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2190 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2193 let it = s.char_indices();
2196 assert_eq!(c, (p[pos], v[pos]));
2199 assert_eq!(pos, v.len());
2200 assert_eq!(pos, p.len());
2204 fn test_char_indices_revator() {
2205 let s = "ศไทย中华Việt Nam";
2206 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2207 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2210 let it = s.char_indices().rev();
2213 assert_eq!(c, (p[pos], v[pos]));
2216 assert_eq!(pos, v.len());
2217 assert_eq!(pos, p.len());
2221 fn test_splitn_char_iterator() {
2222 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2224 let split: Vec<&str> = data.splitn(3, ' ').collect();
2225 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2227 let split: Vec<&str> = data.splitn(3, |c: char| c == ' ').collect();
2228 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2231 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2232 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2234 let split: Vec<&str> = data.splitn(3, |c: char| c == 'ä').collect();
2235 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2239 fn test_split_char_iterator_no_trailing() {
2240 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2242 let split: Vec<&str> = data.split('\n').collect();
2243 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2245 let split: Vec<&str> = data.split_terminator('\n').collect();
2246 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2251 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2252 let words: Vec<&str> = data.words().collect();
2253 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2257 fn test_nfd_chars() {
2259 ($input: expr, $expected: expr) => {
2260 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2264 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2265 t!("\u{2026}", "\u{2026}");
2266 t!("\u{2126}", "\u{3a9}");
2267 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2268 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2269 t!("a\u{301}", "a\u{301}");
2270 t!("\u{301}a", "\u{301}a");
2271 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2272 t!("\u{ac1c}", "\u{1100}\u{1162}");
2276 fn test_nfkd_chars() {
2278 ($input: expr, $expected: expr) => {
2279 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2283 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2284 t!("\u{2026}", "...");
2285 t!("\u{2126}", "\u{3a9}");
2286 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2287 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2288 t!("a\u{301}", "a\u{301}");
2289 t!("\u{301}a", "\u{301}a");
2290 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2291 t!("\u{ac1c}", "\u{1100}\u{1162}");
2295 fn test_nfc_chars() {
2297 ($input: expr, $expected: expr) => {
2298 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2302 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2303 t!("\u{2026}", "\u{2026}");
2304 t!("\u{2126}", "\u{3a9}");
2305 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2306 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2307 t!("a\u{301}", "\u{e1}");
2308 t!("\u{301}a", "\u{301}a");
2309 t!("\u{d4db}", "\u{d4db}");
2310 t!("\u{ac1c}", "\u{ac1c}");
2311 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2315 fn test_nfkc_chars() {
2317 ($input: expr, $expected: expr) => {
2318 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2322 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2323 t!("\u{2026}", "...");
2324 t!("\u{2126}", "\u{3a9}");
2325 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2326 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2327 t!("a\u{301}", "\u{e1}");
2328 t!("\u{301}a", "\u{301}a");
2329 t!("\u{d4db}", "\u{d4db}");
2330 t!("\u{ac1c}", "\u{ac1c}");
2331 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2336 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2337 let lines: Vec<&str> = data.lines().collect();
2338 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2340 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2341 let lines: Vec<&str> = data.lines().collect();
2342 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2346 fn test_graphemes() {
2347 use core::iter::order;
2348 // official Unicode test data
2349 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2350 let test_same: [(_, &[_]); 325] = [
2351 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2352 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2353 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2354 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2355 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2356 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2357 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2358 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2359 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2360 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2361 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2362 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2363 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2364 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2365 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2366 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2367 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2368 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2369 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2370 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2371 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2372 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2373 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2374 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2375 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2376 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2377 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2378 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2379 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2380 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2381 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2382 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2383 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2384 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2385 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2386 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2387 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2388 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2389 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2390 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2391 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2392 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2393 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2394 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2395 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2396 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2397 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2398 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2399 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2400 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2401 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2402 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2403 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2404 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2405 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2406 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2407 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2408 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2409 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2410 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2411 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2412 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2413 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2414 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2415 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2416 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2417 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2418 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2419 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2420 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2421 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2422 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2423 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2424 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2425 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2426 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2427 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2428 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2429 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2430 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2431 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2432 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2433 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2434 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2435 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2436 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2437 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2438 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2439 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2440 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2441 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2442 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2443 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2444 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2445 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2446 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2447 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2448 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2449 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2450 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2451 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2452 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2453 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2454 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2455 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2456 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2457 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2458 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2459 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2460 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2461 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2462 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2463 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2464 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2465 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2466 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2467 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2468 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2469 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2470 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2471 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2472 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2473 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2474 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2475 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2476 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2477 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2478 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2479 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2480 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2481 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2482 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2483 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2484 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2485 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2486 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2487 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2488 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2489 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2490 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2491 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2492 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2493 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2494 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2495 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2496 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2497 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2498 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2499 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2500 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2501 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2502 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2503 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2504 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2505 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2506 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2507 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2508 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2509 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2510 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2511 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2512 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2513 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2514 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2515 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2516 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2517 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2518 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2519 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2520 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2521 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2522 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2523 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2524 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2525 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2526 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2527 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2528 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2529 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2530 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2531 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2532 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2533 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2534 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2535 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2536 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2537 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2538 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2539 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2540 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2541 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2542 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2543 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2544 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2545 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2546 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2547 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2548 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2549 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2550 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2551 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2552 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2553 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2554 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2555 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2556 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2557 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2558 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2559 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2560 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2561 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2562 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2563 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2564 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2565 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2566 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2567 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2568 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2569 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2570 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2571 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2572 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2573 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2574 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2575 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2576 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2577 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2578 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2579 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2580 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2581 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2582 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2583 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2584 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2585 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2586 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2587 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2588 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2589 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2590 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2591 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2592 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2593 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2594 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2595 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2596 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2597 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2598 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2599 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2600 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2601 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2602 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2603 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2604 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2605 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2606 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2607 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2608 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2609 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2610 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2611 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2612 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2613 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2614 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2615 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2616 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2617 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2618 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2619 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2620 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2621 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2622 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2623 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2624 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2625 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2626 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2627 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2628 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2629 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2630 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2631 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2632 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2633 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2634 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2635 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2636 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2637 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2638 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2639 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2640 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2641 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2642 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2643 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2644 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2645 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2646 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2647 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2648 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2649 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2650 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2651 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2652 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2653 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2654 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2655 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2656 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2657 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2658 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2659 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2660 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2661 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2662 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2663 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2664 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2665 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2666 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2667 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2668 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2669 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2670 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2671 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2672 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2673 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2674 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2675 "\u{1F1E7}\u{1F1E8}"]),
2676 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2677 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2678 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2679 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2682 let test_diff: [(_, &[_], &[_]); 23] = [
2683 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2684 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2685 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2686 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2687 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2688 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2689 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2690 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2691 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2692 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2693 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2694 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2695 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2696 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2697 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2698 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2699 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2700 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2701 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2702 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2703 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2704 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2705 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2708 for &(s, g) in &test_same[..] {
2709 // test forward iterator
2710 assert!(order::equals(s.graphemes(true), g.iter().cloned()));
2711 assert!(order::equals(s.graphemes(false), g.iter().cloned()));
2713 // test reverse iterator
2714 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().cloned()));
2715 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().cloned()));
2718 for &(s, gt, gf) in &test_diff {
2719 // test forward iterator
2720 assert!(order::equals(s.graphemes(true), gt.iter().cloned()));
2721 assert!(order::equals(s.graphemes(false), gf.iter().cloned()));
2723 // test reverse iterator
2724 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().cloned()));
2725 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().cloned()));
2728 // test the indices iterators
2729 let s = "a̐éö̲\r\n";
2730 let gr_inds = s.grapheme_indices(true).collect::<Vec<(usize, &str)>>();
2731 let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2732 assert_eq!(gr_inds, b);
2733 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(usize, &str)>>();
2734 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")];
2735 assert_eq!(gr_inds, b);
2736 let mut gr_inds_iter = s.grapheme_indices(true);
2738 let gr_inds = gr_inds_iter.by_ref();
2739 let e1 = gr_inds.size_hint();
2740 assert_eq!(e1, (1, Some(13)));
2741 let c = gr_inds.count();
2744 let e2 = gr_inds_iter.size_hint();
2745 assert_eq!(e2, (0, Some(0)));
2747 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2749 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2750 let b: &[_] = &["\r", "\r\n", "\n"];
2755 fn test_split_strator() {
2756 fn t(s: &str, sep: &str, u: &[&str]) {
2757 let v: Vec<&str> = s.split_str(sep).collect();
2760 t("--1233345--", "12345", &["--1233345--"]);
2761 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2762 t("::hello::there", "::", &["", "hello", "there"]);
2763 t("hello::there::", "::", &["hello", "there", ""]);
2764 t("::hello::there::", "::", &["", "hello", "there", ""]);
2765 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2766 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2767 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2768 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2770 t("zz", "zz", &["",""]);
2771 t("ok", "z", &["ok"]);
2772 t("zzz", "zz", &["","z"]);
2773 t("zzzzz", "zz", &["","","z"]);
2777 fn test_str_default() {
2778 use core::default::Default;
2779 fn t<S: Default + Str>() {
2780 let s: S = Default::default();
2781 assert_eq!(s.as_slice(), "");
2789 fn test_str_container() {
2790 fn sum_len(v: &[&str]) -> usize {
2791 v.iter().map(|x| x.len()).sum()
2794 let s = String::from_str("01234");
2795 assert_eq!(5, sum_len(&["012", "", "34"]));
2796 assert_eq!(5, sum_len(&[&String::from_str("01"),
2797 &String::from_str("2"),
2798 &String::from_str("34"),
2799 &String::from_str("")]));
2800 assert_eq!(5, sum_len(&[&s]));
2804 fn test_str_from_utf8() {
2806 assert_eq!(from_utf8(xs), Ok("hello"));
2808 let xs = "ศไทย中华Việt Nam".as_bytes();
2809 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2811 let xs = b"hello\xFF";
2812 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2819 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2821 use test::black_box;
2824 fn char_iterator(b: &mut Bencher) {
2825 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2827 b.iter(|| s.chars().count());
2831 fn char_iterator_for(b: &mut Bencher) {
2832 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2835 for ch in s.chars() { black_box(ch); }
2840 fn char_iterator_ascii(b: &mut Bencher) {
2841 let s = "Mary had a little lamb, Little lamb
2842 Mary had a little lamb, Little lamb
2843 Mary had a little lamb, Little lamb
2844 Mary had a little lamb, Little lamb
2845 Mary had a little lamb, Little lamb
2846 Mary had a little lamb, Little lamb";
2848 b.iter(|| s.chars().count());
2852 fn char_iterator_rev(b: &mut Bencher) {
2853 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2855 b.iter(|| s.chars().rev().count());
2859 fn char_iterator_rev_for(b: &mut Bencher) {
2860 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2863 for ch in s.chars().rev() { black_box(ch); }
2868 fn char_indicesator(b: &mut Bencher) {
2869 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2870 let len = s.chars().count();
2872 b.iter(|| assert_eq!(s.char_indices().count(), len));
2876 fn char_indicesator_rev(b: &mut Bencher) {
2877 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2878 let len = s.chars().count();
2880 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2884 fn split_unicode_ascii(b: &mut Bencher) {
2885 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2887 b.iter(|| assert_eq!(s.split('V').count(), 3));
2891 fn split_unicode_not_ascii(b: &mut Bencher) {
2892 struct NotAscii(char);
2893 impl CharEq for NotAscii {
2894 fn matches(&mut self, c: char) -> bool {
2895 let NotAscii(cc) = *self;
2898 fn only_ascii(&self) -> bool { false }
2900 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2902 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2907 fn split_ascii(b: &mut Bencher) {
2908 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2909 let len = s.split(' ').count();
2911 b.iter(|| assert_eq!(s.split(' ').count(), len));
2915 fn split_not_ascii(b: &mut Bencher) {
2916 struct NotAscii(char);
2917 impl CharEq for NotAscii {
2919 fn matches(&mut self, c: char) -> bool {
2920 let NotAscii(cc) = *self;
2923 fn only_ascii(&self) -> bool { false }
2925 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2926 let len = s.split(' ').count();
2928 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2932 fn split_extern_fn(b: &mut Bencher) {
2933 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2934 let len = s.split(' ').count();
2935 fn pred(c: char) -> bool { c == ' ' }
2937 b.iter(|| assert_eq!(s.split(pred).count(), len));
2941 fn split_closure(b: &mut Bencher) {
2942 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2943 let len = s.split(' ').count();
2945 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2949 fn split_slice(b: &mut Bencher) {
2950 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2951 let len = s.split(' ').count();
2953 let c: &[char] = &[' '];
2954 b.iter(|| assert_eq!(s.split(c).count(), len));
2958 fn bench_connect(b: &mut Bencher) {
2959 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2961 let v = vec![s, s, s, s, s, s, s, s, s, s];
2963 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2968 fn bench_contains_short_short(b: &mut Bencher) {
2969 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2973 assert!(haystack.contains(needle));
2978 fn bench_contains_short_long(b: &mut Bencher) {
2980 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2981 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2982 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2983 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2984 tempus vel, gravida nec quam.
2986 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2987 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2988 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2989 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2990 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2991 interdum. Curabitur ut nisi justo.
2993 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2994 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2995 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2996 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2997 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2998 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2999 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3000 Aliquam sit amet placerat lorem.
3002 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3003 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3004 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3005 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3006 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3009 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3010 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3011 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3012 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3013 malesuada sollicitudin quam eu fermentum.";
3014 let needle = "english";
3017 assert!(!haystack.contains(needle));
3022 fn bench_contains_bad_naive(b: &mut Bencher) {
3023 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3024 let needle = "aaaaaaaab";
3027 assert!(!haystack.contains(needle));
3032 fn bench_contains_equal(b: &mut Bencher) {
3033 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3034 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3037 assert!(haystack.contains(needle));