1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
58 use core::borrow::{BorrowFrom, ToOwned};
59 use core::char::CharExt;
60 use core::clone::Clone;
61 use core::iter::AdditiveIterator;
62 use core::iter::{range, Iterator, IteratorExt};
64 use core::option::Option::{self, Some, None};
65 use core::slice::AsSlice;
66 use core::str as core_str;
67 use unicode::str::{UnicodeStr, Utf16Encoder};
69 use ring_buf::RingBuf;
74 use slice::SliceConcatExt;
76 pub use core::str::{FromStr, Utf8Error, Str};
77 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
78 pub use core::str::{Split, SplitTerminator};
79 pub use core::str::{SplitN, RSplitN};
80 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
81 pub use core::str::{from_utf8_unchecked, from_c_str};
82 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
85 Section: Creating a string
88 impl<S: Str> SliceConcatExt<str, String> for [S] {
89 fn concat(&self) -> String {
90 let s = self.as_slice();
96 // `len` calculation may overflow but push_str will check boundaries
97 let len = s.iter().map(|s| s.as_slice().len()).sum();
98 let mut result = String::with_capacity(len);
101 result.push_str(s.as_slice())
107 fn connect(&self, sep: &str) -> String {
108 let s = self.as_slice();
111 return String::new();
119 // this is wrong without the guarantee that `self` is non-empty
120 // `len` calculation may overflow but push_str but will check boundaries
121 let len = sep.len() * (s.len() - 1)
122 + s.iter().map(|s| s.as_slice().len()).sum();
123 let mut result = String::with_capacity(len);
124 let mut first = true;
130 result.push_str(sep);
132 result.push_str(s.as_slice());
142 // Helper functions used for Unicode normalization
143 fn canonical_sort(comb: &mut [(char, u8)]) {
144 let len = comb.len();
145 for i in range(0, len) {
146 let mut swapped = false;
147 for j in range(1, len-i) {
148 let class_a = comb[j-1].1;
149 let class_b = comb[j].1;
150 if class_a != 0 && class_b != 0 && class_a > class_b {
155 if !swapped { break; }
160 enum DecompositionType {
165 /// External iterator for a string's decomposition's characters.
166 /// Use with the `std::iter` module.
169 pub struct Decompositions<'a> {
170 kind: DecompositionType,
172 buffer: Vec<(char, u8)>,
177 impl<'a> Iterator for Decompositions<'a> {
181 fn next(&mut self) -> Option<char> {
182 match self.buffer.first() {
185 self.buffer.remove(0);
188 Some(&(c, _)) if self.sorted => {
189 self.buffer.remove(0);
192 _ => self.sorted = false
196 for ch in self.iter {
197 let buffer = &mut self.buffer;
198 let sorted = &mut self.sorted;
200 let callback = |&mut: d| {
202 unicode::char::canonical_combining_class(d);
203 if class == 0 && !*sorted {
204 canonical_sort(buffer.as_mut_slice());
207 buffer.push((d, class));
211 unicode::char::decompose_canonical(ch, callback)
214 unicode::char::decompose_compatible(ch, callback)
225 canonical_sort(self.buffer.as_mut_slice());
229 if self.buffer.is_empty() {
232 match self.buffer.remove(0) {
242 fn size_hint(&self) -> (uint, Option<uint>) {
243 let (lower, _) = self.iter.size_hint();
249 enum RecompositionState {
255 /// External iterator for a string's recomposition's characters.
256 /// Use with the `std::iter` module.
259 pub struct Recompositions<'a> {
260 iter: Decompositions<'a>,
261 state: RecompositionState,
262 buffer: RingBuf<char>,
263 composee: Option<char>,
268 impl<'a> Iterator for Recompositions<'a> {
272 fn next(&mut self) -> Option<char> {
276 for ch in self.iter {
277 let ch_class = unicode::char::canonical_combining_class(ch);
278 if self.composee.is_none() {
282 self.composee = Some(ch);
285 let k = self.composee.clone().unwrap();
287 match self.last_ccc {
289 match unicode::char::compose(k, ch) {
291 self.composee = Some(r);
296 self.composee = Some(ch);
299 self.buffer.push_back(ch);
300 self.last_ccc = Some(ch_class);
305 if l_class >= ch_class {
306 // `ch` is blocked from `composee`
308 self.composee = Some(ch);
309 self.last_ccc = None;
310 self.state = Purging;
313 self.buffer.push_back(ch);
314 self.last_ccc = Some(ch_class);
317 match unicode::char::compose(k, ch) {
319 self.composee = Some(r);
323 self.buffer.push_back(ch);
324 self.last_ccc = Some(ch_class);
330 self.state = Finished;
331 if self.composee.is_some() {
332 return self.composee.take();
336 match self.buffer.pop_front() {
337 None => self.state = Composing,
342 match self.buffer.pop_front() {
343 None => return self.composee.take(),
352 /// External iterator for a string's UTF16 codeunits.
353 /// Use with the `std::iter` module.
356 pub struct Utf16Units<'a> {
357 encoder: Utf16Encoder<Chars<'a>>
361 impl<'a> Iterator for Utf16Units<'a> {
365 fn next(&mut self) -> Option<u16> { self.encoder.next() }
368 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
375 // Return the initial codepoint accumulator for the first byte.
376 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
377 // for width 3, and 3 bits for width 4
378 macro_rules! utf8_first_byte {
379 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
382 // return the value of $ch updated with continuation byte $byte
383 macro_rules! utf8_acc_cont_byte {
384 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
387 #[unstable = "trait is unstable"]
388 impl BorrowFrom<String> for str {
389 fn borrow_from(owned: &String) -> &str { owned[] }
392 #[unstable = "trait is unstable"]
393 impl ToOwned<String> for str {
394 fn to_owned(&self) -> String {
396 String::from_utf8_unchecked(self.as_bytes().to_owned())
406 Section: Trait implementations
409 /// Any string that can be represented as a slice.
411 pub trait StrExt: ops::Slice<uint, str> {
412 /// Escapes each char in `s` with `char::escape_default`.
413 #[unstable = "return type may change to be an iterator"]
414 fn escape_default(&self) -> String {
415 self.chars().flat_map(|c| c.escape_default()).collect()
418 /// Escapes each char in `s` with `char::escape_unicode`.
419 #[unstable = "return type may change to be an iterator"]
420 fn escape_unicode(&self) -> String {
421 self.chars().flat_map(|c| c.escape_unicode()).collect()
424 /// Replaces all occurrences of one string with another.
428 /// * `from` - The string to replace
429 /// * `to` - The replacement string
433 /// The original string with all occurrences of `from` replaced with `to`.
438 /// let s = "Do you know the muffin man,
439 /// The muffin man, the muffin man, ...".to_string();
441 /// assert_eq!(s.replace("muffin man", "little lamb"),
442 /// "Do you know the little lamb,
443 /// The little lamb, the little lamb, ...".to_string());
445 /// // not found, so no change.
446 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
449 fn replace(&self, from: &str, to: &str) -> String {
450 let mut result = String::new();
451 let mut last_end = 0;
452 for (start, end) in self.match_indices(from) {
453 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
457 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
461 /// Returns an iterator over the string in Unicode Normalization Form D
462 /// (canonical decomposition).
464 #[unstable = "this functionality may be moved to libunicode"]
465 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
467 iter: self[].chars(),
474 /// Returns an iterator over the string in Unicode Normalization Form KD
475 /// (compatibility decomposition).
477 #[unstable = "this functionality may be moved to libunicode"]
478 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
480 iter: self[].chars(),
487 /// An Iterator over the string in Unicode Normalization Form C
488 /// (canonical decomposition followed by canonical composition).
490 #[unstable = "this functionality may be moved to libunicode"]
491 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
493 iter: self.nfd_chars(),
495 buffer: RingBuf::new(),
501 /// An Iterator over the string in Unicode Normalization Form KC
502 /// (compatibility decomposition followed by canonical composition).
504 #[unstable = "this functionality may be moved to libunicode"]
505 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
507 iter: self.nfkd_chars(),
509 buffer: RingBuf::new(),
515 /// Returns true if a string contains a string pattern.
519 /// - pat - The string pattern to look for
524 /// assert!("bananas".contains("nana"));
527 fn contains(&self, pat: &str) -> bool {
528 core_str::StrExt::contains(self[], pat)
531 /// Returns true if a string contains a char pattern.
535 /// - pat - The char pattern to look for
540 /// assert!("hello".contains_char('e'));
542 #[unstable = "might get removed in favour of a more generic contains()"]
543 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
544 core_str::StrExt::contains_char(self[], pat)
547 /// An iterator over the characters of `self`. Note, this iterates
548 /// over Unicode code-points, not Unicode graphemes.
553 /// let v: Vec<char> = "abc åäö".chars().collect();
554 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
557 fn chars(&self) -> Chars {
558 core_str::StrExt::chars(self[])
561 /// An iterator over the bytes of `self`
566 /// let v: Vec<u8> = "bors".bytes().collect();
567 /// assert_eq!(v, b"bors".to_vec());
570 fn bytes(&self) -> Bytes {
571 core_str::StrExt::bytes(self[])
574 /// An iterator over the characters of `self` and their byte offsets.
576 fn char_indices(&self) -> CharIndices {
577 core_str::StrExt::char_indices(self[])
580 /// An iterator over substrings of `self`, separated by characters
581 /// matched by the pattern `pat`.
586 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
587 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
589 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
590 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
592 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
593 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
595 /// let v: Vec<&str> = "".split('X').collect();
596 /// assert_eq!(v, vec![""]);
599 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
600 core_str::StrExt::split(self[], pat)
603 /// An iterator over substrings of `self`, separated by characters
604 /// matched by the pattern `pat`, restricted to splitting at most `count`
610 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
611 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
613 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
614 /// assert_eq!(v, vec!["abc", "def2ghi"]);
616 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
617 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
619 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
620 /// assert_eq!(v, vec!["abcXdef"]);
622 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
623 /// assert_eq!(v, vec![""]);
626 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
627 core_str::StrExt::splitn(self[], count, pat)
630 /// An iterator over substrings of `self`, separated by characters
631 /// matched by the pattern `pat`.
633 /// Equivalent to `split`, except that the trailing substring
634 /// is skipped if empty (terminator semantics).
639 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
640 /// assert_eq!(v, vec!["A", "B"]);
642 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
643 /// assert_eq!(v, vec!["A", "", "B", ""]);
645 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
646 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
648 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
649 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
651 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
652 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
654 #[unstable = "might get removed"]
655 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
656 core_str::StrExt::split_terminator(self[], pat)
659 /// An iterator over substrings of `self`, separated by characters
660 /// matched by the pattern `pat`, starting from the end of the string.
661 /// Restricted to splitting at most `count` times.
666 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
667 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
669 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
670 /// assert_eq!(v, vec!["ghi", "abc1def"]);
672 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
673 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
676 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
677 core_str::StrExt::rsplitn(self[], count, pat)
680 /// An iterator over the start and end indices of the disjoint
681 /// matches of the pattern `pat` within `self`.
683 /// That is, each returned value `(start, end)` satisfies
684 /// `self.slice(start, end) == sep`. For matches of `sep` within
685 /// `self` that overlap, only the indices corresponding to the
686 /// first match are returned.
691 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
692 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
694 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
695 /// assert_eq!(v, vec![(1,4), (4,7)]);
697 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
698 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
700 #[unstable = "might have its iterator type changed"]
701 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
702 core_str::StrExt::match_indices(self[], pat)
705 /// An iterator over the substrings of `self` separated by the pattern `sep`.
710 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
711 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
713 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
714 /// assert_eq!(v, vec!["1", "", "2"]);
716 #[unstable = "might get removed in the future in favor of a more generic split()"]
717 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
718 core_str::StrExt::split_str(self[], pat)
721 /// An iterator over the lines of a string (subsequences separated
722 /// by `\n`). This does not include the empty string after a
728 /// let four_lines = "foo\nbar\n\nbaz\n";
729 /// let v: Vec<&str> = four_lines.lines().collect();
730 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
733 fn lines(&self) -> Lines {
734 core_str::StrExt::lines(self[])
737 /// An iterator over the lines of a string, separated by either
738 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
739 /// empty trailing line.
744 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
745 /// let v: Vec<&str> = four_lines.lines_any().collect();
746 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
749 fn lines_any(&self) -> LinesAny {
750 core_str::StrExt::lines_any(self[])
753 /// Returns a slice of the given string from the byte range
754 /// [`begin`..`end`).
756 /// This operation is `O(1)`.
758 /// Panics when `begin` and `end` do not point to valid characters
759 /// or point beyond the last character of the string.
761 /// See also `slice_to` and `slice_from` for slicing prefixes and
762 /// suffixes of strings, and `slice_chars` for slicing based on
763 /// code point counts.
768 /// let s = "Löwe 老虎 Léopard";
769 /// assert_eq!(s.slice(0, 1), "L");
771 /// assert_eq!(s.slice(1, 9), "öwe 老");
773 /// // these will panic:
774 /// // byte 2 lies within `ö`:
775 /// // s.slice(2, 3);
777 /// // byte 8 lies within `老`
778 /// // s.slice(1, 8);
780 /// // byte 100 is outside the string
781 /// // s.slice(3, 100);
783 #[unstable = "use slice notation [a..b] instead"]
784 fn slice(&self, begin: uint, end: uint) -> &str {
785 core_str::StrExt::slice(self[], begin, end)
788 /// Returns a slice of the string from `begin` to its end.
790 /// Equivalent to `self.slice(begin, self.len())`.
792 /// Panics when `begin` does not point to a valid character, or is
795 /// See also `slice`, `slice_to` and `slice_chars`.
796 #[unstable = "use slice notation [a..] instead"]
797 fn slice_from(&self, begin: uint) -> &str {
798 core_str::StrExt::slice_from(self[], begin)
801 /// Returns a slice of the string from the beginning to byte
804 /// Equivalent to `self.slice(0, end)`.
806 /// Panics when `end` does not point to a valid character, or is
809 /// See also `slice`, `slice_from` and `slice_chars`.
810 #[unstable = "use slice notation [0..a] instead"]
811 fn slice_to(&self, end: uint) -> &str {
812 core_str::StrExt::slice_to(self[], end)
815 /// Returns a slice of the string from the character range
816 /// [`begin`..`end`).
818 /// That is, start at the `begin`-th code point of the string and
819 /// continue to the `end`-th code point. This does not detect or
820 /// handle edge cases such as leaving a combining character as the
821 /// first code point of the string.
823 /// Due to the design of UTF-8, this operation is `O(end)`.
824 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
825 /// variants that use byte indices rather than code point
828 /// Panics if `begin` > `end` or the either `begin` or `end` are
829 /// beyond the last character of the string.
834 /// let s = "Löwe 老虎 Léopard";
835 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
836 /// assert_eq!(s.slice_chars(5, 7), "老虎");
838 #[unstable = "may have yet to prove its worth"]
839 fn slice_chars(&self, begin: uint, end: uint) -> &str {
840 core_str::StrExt::slice_chars(self[], begin, end)
843 /// Takes a bytewise (not UTF-8) slice from a string.
845 /// Returns the substring from [`begin`..`end`).
847 /// Caller must check both UTF-8 character boundaries and the boundaries of
848 /// the entire slice as well.
850 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
851 core_str::StrExt::slice_unchecked(self[], begin, end)
854 /// Returns true if the pattern `pat` is a prefix of the string.
859 /// assert!("banana".starts_with("ba"));
862 fn starts_with(&self, pat: &str) -> bool {
863 core_str::StrExt::starts_with(self[], pat)
866 /// Returns true if the pattern `pat` is a suffix of the string.
871 /// assert!("banana".ends_with("nana"));
874 fn ends_with(&self, pat: &str) -> bool {
875 core_str::StrExt::ends_with(self[], pat)
878 /// Returns a string with all pre- and suffixes that match
879 /// the pattern `pat` repeatedly removed.
883 /// * pat - a string pattern
888 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
889 /// let x: &[_] = &['1', '2'];
890 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
891 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
894 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
895 core_str::StrExt::trim_matches(self[], pat)
898 /// Returns a string with all prefixes that match
899 /// the pattern `pat` repeatedly removed.
903 /// * pat - a string pattern
908 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
909 /// let x: &[_] = &['1', '2'];
910 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
911 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
914 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
915 core_str::StrExt::trim_left_matches(self[], pat)
918 /// Returns a string with all suffixes that match
919 /// the pattern `pat` repeatedly removed.
923 /// * pat - a string pattern
928 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
929 /// let x: &[_] = &['1', '2'];
930 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
931 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
934 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
935 core_str::StrExt::trim_right_matches(self[], pat)
938 /// Check that `index`-th byte lies at the start and/or end of a
939 /// UTF-8 code point sequence.
941 /// The start and end of the string (when `index == self.len()`)
942 /// are considered to be boundaries.
944 /// Panics if `index` is greater than `self.len()`.
949 /// let s = "Löwe 老虎 Léopard";
950 /// assert!(s.is_char_boundary(0));
952 /// assert!(s.is_char_boundary(6));
953 /// assert!(s.is_char_boundary(s.len()));
955 /// // second byte of `ö`
956 /// assert!(!s.is_char_boundary(2));
958 /// // third byte of `老`
959 /// assert!(!s.is_char_boundary(8));
961 #[unstable = "naming is uncertain with container conventions"]
962 fn is_char_boundary(&self, index: uint) -> bool {
963 core_str::StrExt::is_char_boundary(self[], index)
966 /// Pluck a character out of a string and return the index of the next
969 /// This function can be used to iterate over the Unicode characters of a
974 /// This example manually iterates through the characters of a
975 /// string; this should normally be done by `.chars()` or
979 /// use std::str::CharRange;
981 /// let s = "中华Việt Nam";
983 /// while i < s.len() {
984 /// let CharRange {ch, next} = s.char_range_at(i);
985 /// println!("{}: {}", i, ch);
1007 /// * s - The string
1008 /// * i - The byte offset of the char to extract
1012 /// A record {ch: char, next: uint} containing the char value and the byte
1013 /// index of the next Unicode character.
1017 /// If `i` is greater than or equal to the length of the string.
1018 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1019 #[unstable = "naming is uncertain with container conventions"]
1020 fn char_range_at(&self, start: uint) -> CharRange {
1021 core_str::StrExt::char_range_at(self[], start)
1024 /// Given a byte position and a str, return the previous char and its position.
1026 /// This function can be used to iterate over a Unicode string in reverse.
1028 /// Returns 0 for next index if called on start index 0.
1032 /// If `i` is greater than the length of the string.
1033 /// If `i` is not an index following a valid UTF-8 character.
1034 #[unstable = "naming is uncertain with container conventions"]
1035 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1036 core_str::StrExt::char_range_at_reverse(self[], start)
1039 /// Plucks the character starting at the `i`th byte of a string.
1045 /// assert_eq!(s.char_at(1), 'b');
1046 /// assert_eq!(s.char_at(2), 'π');
1047 /// assert_eq!(s.char_at(4), 'c');
1052 /// If `i` is greater than or equal to the length of the string.
1053 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1054 #[unstable = "naming is uncertain with container conventions"]
1055 fn char_at(&self, i: uint) -> char {
1056 core_str::StrExt::char_at(self[], i)
1059 /// Plucks the character ending at the `i`th byte of a string.
1063 /// If `i` is greater than the length of the string.
1064 /// If `i` is not an index following a valid UTF-8 character.
1065 #[unstable = "naming is uncertain with container conventions"]
1066 fn char_at_reverse(&self, i: uint) -> char {
1067 core_str::StrExt::char_at_reverse(self[], i)
1070 /// Work with the byte buffer of a string as a byte slice.
1075 /// assert_eq!("bors".as_bytes(), b"bors");
1078 fn as_bytes(&self) -> &[u8] {
1079 core_str::StrExt::as_bytes(self[])
1082 /// Returns the byte index of the first character of `self` that
1083 /// matches the pattern `pat`.
1087 /// `Some` containing the byte index of the last matching character
1088 /// or `None` if there is no match
1093 /// let s = "Löwe 老虎 Léopard";
1095 /// assert_eq!(s.find('L'), Some(0));
1096 /// assert_eq!(s.find('é'), Some(14));
1098 /// // the first space
1099 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1101 /// // neither are found
1102 /// let x: &[_] = &['1', '2'];
1103 /// assert_eq!(s.find(x), None);
1106 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1107 core_str::StrExt::find(self[], pat)
1110 /// Returns the byte index of the last character of `self` that
1111 /// matches the pattern `pat`.
1115 /// `Some` containing the byte index of the last matching character
1116 /// or `None` if there is no match.
1121 /// let s = "Löwe 老虎 Léopard";
1123 /// assert_eq!(s.rfind('L'), Some(13));
1124 /// assert_eq!(s.rfind('é'), Some(14));
1126 /// // the second space
1127 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1129 /// // searches for an occurrence of either `1` or `2`, but neither are found
1130 /// let x: &[_] = &['1', '2'];
1131 /// assert_eq!(s.rfind(x), None);
1134 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1135 core_str::StrExt::rfind(self[], pat)
1138 /// Returns the byte index of the first matching substring
1142 /// * `needle` - The string to search for
1146 /// `Some` containing the byte index of the first matching substring
1147 /// or `None` if there is no match.
1152 /// let s = "Löwe 老虎 Léopard";
1154 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1155 /// assert_eq!(s.find_str("muffin man"), None);
1157 #[unstable = "might get removed in favor of a more generic find in the future"]
1158 fn find_str(&self, needle: &str) -> Option<uint> {
1159 core_str::StrExt::find_str(self[], needle)
1162 /// Retrieves the first character from a string slice and returns
1163 /// it. This does not allocate a new string; instead, it returns a
1164 /// slice that point one character beyond the character that was
1165 /// shifted. If the string does not contain any characters,
1166 /// None is returned instead.
1171 /// let s = "Löwe 老虎 Léopard";
1172 /// let (c, s1) = s.slice_shift_char().unwrap();
1173 /// assert_eq!(c, 'L');
1174 /// assert_eq!(s1, "öwe 老虎 Léopard");
1176 /// let (c, s2) = s1.slice_shift_char().unwrap();
1177 /// assert_eq!(c, 'ö');
1178 /// assert_eq!(s2, "we 老虎 Léopard");
1180 #[unstable = "awaiting conventions about shifting and slices"]
1181 fn slice_shift_char(&self) -> Option<(char, &str)> {
1182 core_str::StrExt::slice_shift_char(self[])
1185 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1187 /// Panics if `inner` is not a direct slice contained within self.
1192 /// let string = "a\nb\nc";
1193 /// let lines: Vec<&str> = string.lines().collect();
1195 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1196 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1197 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1199 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1200 fn subslice_offset(&self, inner: &str) -> uint {
1201 core_str::StrExt::subslice_offset(self[], inner)
1204 /// Return an unsafe pointer to the strings buffer.
1206 /// The caller must ensure that the string outlives this pointer,
1207 /// and that it is not reallocated (e.g. by pushing to the
1211 fn as_ptr(&self) -> *const u8 {
1212 core_str::StrExt::as_ptr(self[])
1215 /// Return an iterator of `u16` over the string encoded as UTF-16.
1216 #[unstable = "this functionality may only be provided by libunicode"]
1217 fn utf16_units(&self) -> Utf16Units {
1218 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1221 /// Return the number of bytes in this string
1226 /// assert_eq!("foo".len(), 3);
1227 /// assert_eq!("ƒoo".len(), 4);
1231 fn len(&self) -> uint {
1232 core_str::StrExt::len(self[])
1235 /// Returns true if this slice contains no bytes
1240 /// assert!("".is_empty());
1244 fn is_empty(&self) -> bool {
1245 core_str::StrExt::is_empty(self[])
1248 /// Parse this string into the specified type.
1253 /// assert_eq!("4".parse::<u32>(), Some(4));
1254 /// assert_eq!("j".parse::<u32>(), None);
1257 #[unstable = "this method was just created"]
1258 fn parse<F: FromStr>(&self) -> Option<F> {
1259 core_str::StrExt::parse(self[])
1262 /// Returns an iterator over the
1263 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1266 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1267 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1268 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1269 /// recommends extended grapheme cluster boundaries for general processing.
1274 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1275 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1276 /// assert_eq!(gr1.as_slice(), b);
1277 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1278 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1279 /// assert_eq!(gr2.as_slice(), b);
1281 #[unstable = "this functionality may only be provided by libunicode"]
1282 fn graphemes(&self, is_extended: bool) -> Graphemes {
1283 UnicodeStr::graphemes(self[], is_extended)
1286 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1287 /// See `graphemes()` method for more information.
1292 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1293 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1294 /// assert_eq!(gr_inds.as_slice(), b);
1296 #[unstable = "this functionality may only be provided by libunicode"]
1297 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1298 UnicodeStr::grapheme_indices(self[], is_extended)
1301 /// An iterator over the words of a string (subsequences separated
1302 /// by any sequence of whitespace). Sequences of whitespace are
1303 /// collapsed, so empty "words" are not included.
1308 /// let some_words = " Mary had\ta little \n\t lamb";
1309 /// let v: Vec<&str> = some_words.words().collect();
1310 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1313 fn words(&self) -> Words {
1314 UnicodeStr::words(self[])
1317 /// Returns a string's displayed width in columns, treating control
1318 /// characters as zero-width.
1320 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1321 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1322 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1323 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1324 /// recommends that these characters be treated as 1 column (i.e.,
1325 /// `is_cjk` = `false`) if the locale is unknown.
1326 #[unstable = "this functionality may only be provided by libunicode"]
1327 fn width(&self, is_cjk: bool) -> uint {
1328 UnicodeStr::width(self[], is_cjk)
1331 /// Returns a string with leading and trailing whitespace removed.
1333 fn trim(&self) -> &str {
1334 UnicodeStr::trim(self[])
1337 /// Returns a string with leading whitespace removed.
1339 fn trim_left(&self) -> &str {
1340 UnicodeStr::trim_left(self[])
1343 /// Returns a string with trailing whitespace removed.
1345 fn trim_right(&self) -> &str {
1346 UnicodeStr::trim_right(self[])
1351 impl StrExt for str {}
1357 use core::iter::AdditiveIterator;
1358 use super::from_utf8;
1359 use super::Utf8Error;
1364 assert!("" <= "foo");
1365 assert!("foo" <= "foo");
1366 assert!("foo" != "bar");
1371 assert_eq!("".len(), 0u);
1372 assert_eq!("hello world".len(), 11u);
1373 assert_eq!("\x63".len(), 1u);
1374 assert_eq!("\u{a2}".len(), 2u);
1375 assert_eq!("\u{3c0}".len(), 2u);
1376 assert_eq!("\u{2620}".len(), 3u);
1377 assert_eq!("\u{1d11e}".len(), 4u);
1379 assert_eq!("".chars().count(), 0u);
1380 assert_eq!("hello world".chars().count(), 11u);
1381 assert_eq!("\x63".chars().count(), 1u);
1382 assert_eq!("\u{a2}".chars().count(), 1u);
1383 assert_eq!("\u{3c0}".chars().count(), 1u);
1384 assert_eq!("\u{2620}".chars().count(), 1u);
1385 assert_eq!("\u{1d11e}".chars().count(), 1u);
1386 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1388 assert_eq!("hello".width(false), 10u);
1389 assert_eq!("hello".width(true), 10u);
1390 assert_eq!("\0\0\0\0\0".width(false), 0u);
1391 assert_eq!("\0\0\0\0\0".width(true), 0u);
1392 assert_eq!("".width(false), 0u);
1393 assert_eq!("".width(true), 0u);
1394 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1395 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1400 assert_eq!("hello".find('l'), Some(2u));
1401 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1402 assert!("hello".find('x').is_none());
1403 assert!("hello".find(|&: c:char| c == 'x').is_none());
1404 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1405 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1410 assert_eq!("hello".rfind('l'), Some(3u));
1411 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1412 assert!("hello".rfind('x').is_none());
1413 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1414 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1415 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1420 let empty = String::from_str("");
1421 let s: String = empty.chars().collect();
1422 assert_eq!(empty, s);
1423 let data = String::from_str("ประเทศไทย中");
1424 let s: String = data.chars().collect();
1425 assert_eq!(data, s);
1429 fn test_into_bytes() {
1430 let data = String::from_str("asdf");
1431 let buf = data.into_bytes();
1432 assert_eq!(b"asdf", buf);
1436 fn test_find_str() {
1438 assert_eq!("".find_str(""), Some(0u));
1439 assert!("banana".find_str("apple pie").is_none());
1441 let data = "abcabc";
1442 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1443 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1444 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1446 let string = "ประเทศไทย中华Việt Nam";
1447 let mut data = String::from_str(string);
1448 data.push_str(string);
1449 assert!(data.find_str("ไท华").is_none());
1450 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1451 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1453 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1454 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1455 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1456 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1457 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1459 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1460 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1461 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1462 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1463 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1467 fn test_slice_chars() {
1468 fn t(a: &str, b: &str, start: uint) {
1469 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1472 t("hello", "llo", 2);
1473 t("hello", "el", 1);
1476 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1479 fn s(x: &str) -> String { x.to_string() }
1481 macro_rules! test_concat {
1482 ($expected: expr, $string: expr) => {
1484 let s: String = $string.concat();
1485 assert_eq!($expected, s);
1491 fn test_concat_for_different_types() {
1492 test_concat!("ab", vec![s("a"), s("b")]);
1493 test_concat!("ab", vec!["a", "b"]);
1494 test_concat!("ab", vec!["a", "b"].as_slice());
1495 test_concat!("ab", vec![s("a"), s("b")]);
1499 fn test_concat_for_different_lengths() {
1500 let empty: &[&str] = &[];
1501 test_concat!("", empty);
1502 test_concat!("a", ["a"]);
1503 test_concat!("ab", ["a", "b"]);
1504 test_concat!("abc", ["", "a", "bc"]);
1507 macro_rules! test_connect {
1508 ($expected: expr, $string: expr, $delim: expr) => {
1510 let s = $string.connect($delim);
1511 assert_eq!($expected, s);
1517 fn test_connect_for_different_types() {
1518 test_connect!("a-b", ["a", "b"], "-");
1519 let hyphen = "-".to_string();
1520 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1521 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1522 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1523 test_connect!("a-b", vec![s("a"), s("b")], "-");
1527 fn test_connect_for_different_lengths() {
1528 let empty: &[&str] = &[];
1529 test_connect!("", empty, "-");
1530 test_connect!("a", ["a"], "-");
1531 test_connect!("a-b", ["a", "b"], "-");
1532 test_connect!("-a-bc", ["", "a", "bc"], "-");
1536 fn test_unsafe_slice() {
1537 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1538 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1539 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1540 fn a_million_letter_a() -> String {
1542 let mut rs = String::new();
1544 rs.push_str("aaaaaaaaaa");
1549 fn half_a_million_letter_a() -> String {
1551 let mut rs = String::new();
1553 rs.push_str("aaaaa");
1558 let letters = a_million_letter_a();
1559 assert!(half_a_million_letter_a() ==
1560 unsafe {String::from_str(letters.slice_unchecked(
1566 fn test_starts_with() {
1567 assert!(("".starts_with("")));
1568 assert!(("abc".starts_with("")));
1569 assert!(("abc".starts_with("a")));
1570 assert!((!"a".starts_with("abc")));
1571 assert!((!"".starts_with("abc")));
1572 assert!((!"ödd".starts_with("-")));
1573 assert!(("ödd".starts_with("öd")));
1577 fn test_ends_with() {
1578 assert!(("".ends_with("")));
1579 assert!(("abc".ends_with("")));
1580 assert!(("abc".ends_with("c")));
1581 assert!((!"a".ends_with("abc")));
1582 assert!((!"".ends_with("abc")));
1583 assert!((!"ddö".ends_with("-")));
1584 assert!(("ddö".ends_with("dö")));
1588 fn test_is_empty() {
1589 assert!("".is_empty());
1590 assert!(!"a".is_empty());
1596 assert_eq!("".replace(a, "b"), String::from_str(""));
1597 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1598 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1600 assert!(" test test ".replace(test, "toast") ==
1601 String::from_str(" toast toast "));
1602 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1606 fn test_replace_2a() {
1607 let data = "ประเทศไทย中华";
1608 let repl = "دولة الكويت";
1611 let a2 = "دولة الكويتทศไทย中华";
1612 assert_eq!(data.replace(a, repl), a2);
1616 fn test_replace_2b() {
1617 let data = "ประเทศไทย中华";
1618 let repl = "دولة الكويت";
1621 let b2 = "ปรدولة الكويتทศไทย中华";
1622 assert_eq!(data.replace(b, repl), b2);
1626 fn test_replace_2c() {
1627 let data = "ประเทศไทย中华";
1628 let repl = "دولة الكويت";
1631 let c2 = "ประเทศไทยدولة الكويت";
1632 assert_eq!(data.replace(c, repl), c2);
1636 fn test_replace_2d() {
1637 let data = "ประเทศไทย中华";
1638 let repl = "دولة الكويت";
1641 assert_eq!(data.replace(d, repl), data);
1646 assert_eq!("ab", "abc".slice(0, 2));
1647 assert_eq!("bc", "abc".slice(1, 3));
1648 assert_eq!("", "abc".slice(1, 1));
1649 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1651 let data = "ประเทศไทย中华";
1652 assert_eq!("ป", data.slice(0, 3));
1653 assert_eq!("ร", data.slice(3, 6));
1654 assert_eq!("", data.slice(3, 3));
1655 assert_eq!("华", data.slice(30, 33));
1657 fn a_million_letter_x() -> String {
1659 let mut rs = String::new();
1661 rs.push_str("华华华华华华华华华华");
1666 fn half_a_million_letter_x() -> String {
1668 let mut rs = String::new();
1670 rs.push_str("华华华华华");
1675 let letters = a_million_letter_x();
1676 assert!(half_a_million_letter_x() ==
1677 String::from_str(letters.slice(0u, 3u * 500000u)));
1682 let ss = "中华Việt Nam";
1684 assert_eq!("华", ss.slice(3u, 6u));
1685 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1687 assert_eq!("ab", "abc".slice(0u, 2u));
1688 assert_eq!("bc", "abc".slice(1u, 3u));
1689 assert_eq!("", "abc".slice(1u, 1u));
1691 assert_eq!("中", ss.slice(0u, 3u));
1692 assert_eq!("华V", ss.slice(3u, 7u));
1693 assert_eq!("", ss.slice(3u, 3u));
1708 fn test_slice_fail() {
1709 "中华Việt Nam".slice(0u, 2u);
1713 fn test_slice_from() {
1714 assert_eq!("abcd".slice_from(0), "abcd");
1715 assert_eq!("abcd".slice_from(2), "cd");
1716 assert_eq!("abcd".slice_from(4), "");
1719 fn test_slice_to() {
1720 assert_eq!("abcd".slice_to(0), "");
1721 assert_eq!("abcd".slice_to(2), "ab");
1722 assert_eq!("abcd".slice_to(4), "abcd");
1726 fn test_trim_left_matches() {
1727 let v: &[char] = &[];
1728 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1729 let chars: &[char] = &['*', ' '];
1730 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1731 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1732 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1734 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1735 let chars: &[char] = &['1', '2'];
1736 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1737 assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1741 fn test_trim_right_matches() {
1742 let v: &[char] = &[];
1743 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1744 let chars: &[char] = &['*', ' '];
1745 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1746 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1747 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1749 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1750 let chars: &[char] = &['1', '2'];
1751 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1752 assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1756 fn test_trim_matches() {
1757 let v: &[char] = &[];
1758 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1759 let chars: &[char] = &['*', ' '];
1760 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1761 assert_eq!(" *** *** ".trim_matches(chars), "");
1762 assert_eq!("foo".trim_matches(chars), "foo");
1764 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1765 let chars: &[char] = &['1', '2'];
1766 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1767 assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1771 fn test_trim_left() {
1772 assert_eq!("".trim_left(), "");
1773 assert_eq!("a".trim_left(), "a");
1774 assert_eq!(" ".trim_left(), "");
1775 assert_eq!(" blah".trim_left(), "blah");
1776 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1777 assert_eq!("hey ".trim_left(), "hey ");
1781 fn test_trim_right() {
1782 assert_eq!("".trim_right(), "");
1783 assert_eq!("a".trim_right(), "a");
1784 assert_eq!(" ".trim_right(), "");
1785 assert_eq!("blah ".trim_right(), "blah");
1786 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1787 assert_eq!(" hey".trim_right(), " hey");
1792 assert_eq!("".trim(), "");
1793 assert_eq!("a".trim(), "a");
1794 assert_eq!(" ".trim(), "");
1795 assert_eq!(" blah ".trim(), "blah");
1796 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1797 assert_eq!(" hey dude ".trim(), "hey dude");
1801 fn test_is_whitespace() {
1802 assert!("".chars().all(|c| c.is_whitespace()));
1803 assert!(" ".chars().all(|c| c.is_whitespace()));
1804 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1805 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1806 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1810 fn test_slice_shift_char() {
1811 let data = "ประเทศไทย中";
1812 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1816 fn test_slice_shift_char_2() {
1818 assert_eq!(empty.slice_shift_char(), None);
1823 // deny overlong encodings
1824 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1825 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1826 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1827 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1828 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1829 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1830 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1833 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1834 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1836 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1837 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1838 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1839 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1840 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1841 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1842 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1843 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1847 fn test_is_utf16() {
1848 use unicode::str::is_utf16;
1849 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1857 // surrogate pairs (randomly generated with Python 3's
1858 // .encode('utf-16be'))
1859 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1860 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1861 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1863 // mixtures (also random)
1864 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1865 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1866 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1869 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1872 // surrogate + regular unit
1874 // surrogate + lead surrogate
1876 // unterminated surrogate
1878 // trail surrogate without a lead
1881 // random byte sequences that Python 3's .decode('utf-16be')
1883 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1884 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1885 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1886 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1887 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1888 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1889 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1890 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1891 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1892 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1893 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1894 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1895 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1896 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1897 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1898 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1899 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1900 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1901 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1902 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1903 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1907 fn test_as_bytes() {
1910 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1911 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1915 assert_eq!("".as_bytes(), b);
1916 assert_eq!("abc".as_bytes(), b"abc");
1917 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1922 fn test_as_bytes_fail() {
1923 // Don't double free. (I'm not sure if this exercises the
1924 // original problem code path anymore.)
1925 let s = String::from_str("");
1926 let _bytes = s.as_bytes();
1932 let buf = "hello".as_ptr();
1934 assert_eq!(*buf.offset(0), b'h');
1935 assert_eq!(*buf.offset(1), b'e');
1936 assert_eq!(*buf.offset(2), b'l');
1937 assert_eq!(*buf.offset(3), b'l');
1938 assert_eq!(*buf.offset(4), b'o');
1943 fn test_subslice_offset() {
1944 let a = "kernelsprite";
1945 let b = a.slice(7, a.len());
1946 let c = a.slice(0, a.len() - 6);
1947 assert_eq!(a.subslice_offset(b), 7);
1948 assert_eq!(a.subslice_offset(c), 0);
1950 let string = "a\nb\nc";
1951 let lines: Vec<&str> = string.lines().collect();
1952 assert_eq!(string.subslice_offset(lines[0]), 0);
1953 assert_eq!(string.subslice_offset(lines[1]), 2);
1954 assert_eq!(string.subslice_offset(lines[2]), 4);
1959 fn test_subslice_offset_2() {
1960 let a = "alchemiter";
1961 let b = "cruxtruder";
1962 a.subslice_offset(b);
1966 fn vec_str_conversions() {
1967 let s1: String = String::from_str("All mimsy were the borogoves");
1969 let v: Vec<u8> = s1.as_bytes().to_vec();
1970 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1971 let mut i: uint = 0u;
1972 let n1: uint = s1.len();
1973 let n2: uint = v.len();
1976 let a: u8 = s1.as_bytes()[i];
1977 let b: u8 = s2.as_bytes()[i];
1986 fn test_contains() {
1987 assert!("abcde".contains("bcd"));
1988 assert!("abcde".contains("abcd"));
1989 assert!("abcde".contains("bcde"));
1990 assert!("abcde".contains(""));
1991 assert!("".contains(""));
1992 assert!(!"abcde".contains("def"));
1993 assert!(!"".contains("a"));
1995 let data = "ประเทศไทย中华Việt Nam";
1996 assert!(data.contains("ประเ"));
1997 assert!(data.contains("ะเ"));
1998 assert!(data.contains("中华"));
1999 assert!(!data.contains("ไท华"));
2003 fn test_contains_char() {
2004 assert!("abc".contains_char('b'));
2005 assert!("a".contains_char('a'));
2006 assert!(!"abc".contains_char('d'));
2007 assert!(!"".contains_char('a'));
2012 let s = "ศไทย中华Việt Nam";
2013 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2015 for ch in v.iter() {
2016 assert!(s.char_at(pos) == *ch);
2017 pos += ch.to_string().len();
2022 fn test_char_at_reverse() {
2023 let s = "ศไทย中华Việt Nam";
2024 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2025 let mut pos = s.len();
2026 for ch in v.iter().rev() {
2027 assert!(s.char_at_reverse(pos) == *ch);
2028 pos -= ch.to_string().len();
2033 fn test_escape_unicode() {
2034 assert_eq!("abc".escape_unicode(),
2035 String::from_str("\\u{61}\\u{62}\\u{63}"));
2036 assert_eq!("a c".escape_unicode(),
2037 String::from_str("\\u{61}\\u{20}\\u{63}"));
2038 assert_eq!("\r\n\t".escape_unicode(),
2039 String::from_str("\\u{d}\\u{a}\\u{9}"));
2040 assert_eq!("'\"\\".escape_unicode(),
2041 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2042 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2043 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2044 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2045 String::from_str("\\u{100}\\u{ffff}"));
2046 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2047 String::from_str("\\u{10000}\\u{10ffff}"));
2048 assert_eq!("ab\u{fb00}".escape_unicode(),
2049 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2050 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2051 String::from_str("\\u{1d4ea}\\u{d}"));
2055 fn test_escape_default() {
2056 assert_eq!("abc".escape_default(), String::from_str("abc"));
2057 assert_eq!("a c".escape_default(), String::from_str("a c"));
2058 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2059 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2060 assert_eq!("\u{100}\u{ffff}".escape_default(),
2061 String::from_str("\\u{100}\\u{ffff}"));
2062 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2063 String::from_str("\\u{10000}\\u{10ffff}"));
2064 assert_eq!("ab\u{fb00}".escape_default(),
2065 String::from_str("ab\\u{fb00}"));
2066 assert_eq!("\u{1d4ea}\r".escape_default(),
2067 String::from_str("\\u{1d4ea}\\r"));
2071 fn test_total_ord() {
2072 "1234".cmp("123") == Greater;
2073 "123".cmp("1234") == Less;
2074 "1234".cmp("1234") == Equal;
2075 "12345555".cmp("123456") == Less;
2076 "22".cmp("1234") == Greater;
2080 fn test_char_range_at() {
2081 let data = "b¢€𤭢𤭢€¢b";
2082 assert_eq!('b', data.char_range_at(0).ch);
2083 assert_eq!('¢', data.char_range_at(1).ch);
2084 assert_eq!('€', data.char_range_at(3).ch);
2085 assert_eq!('𤭢', data.char_range_at(6).ch);
2086 assert_eq!('𤭢', data.char_range_at(10).ch);
2087 assert_eq!('€', data.char_range_at(14).ch);
2088 assert_eq!('¢', data.char_range_at(17).ch);
2089 assert_eq!('b', data.char_range_at(19).ch);
2093 fn test_char_range_at_reverse_underflow() {
2094 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2098 fn test_iterator() {
2099 let s = "ศไทย中华Việt Nam";
2100 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2103 let mut it = s.chars();
2106 assert_eq!(c, v[pos]);
2109 assert_eq!(pos, v.len());
2113 fn test_rev_iterator() {
2114 let s = "ศไทย中华Việt Nam";
2115 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2118 let mut it = s.chars().rev();
2121 assert_eq!(c, v[pos]);
2124 assert_eq!(pos, v.len());
2128 fn test_chars_decoding() {
2129 let mut bytes = [0u8; 4];
2130 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2131 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2132 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2133 if Some(c) != s.chars().next() {
2134 panic!("character {:x}={} does not decode correctly", c as u32, c);
2140 fn test_chars_rev_decoding() {
2141 let mut bytes = [0u8; 4];
2142 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2143 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2144 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2145 if Some(c) != s.chars().rev().next() {
2146 panic!("character {:x}={} does not decode correctly", c as u32, c);
2152 fn test_iterator_clone() {
2153 let s = "ศไทย中华Việt Nam";
2154 let mut it = s.chars();
2156 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2160 fn test_bytesator() {
2161 let s = "ศไทย中华Việt Nam";
2163 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2164 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2169 for b in s.bytes() {
2170 assert_eq!(b, v[pos]);
2176 fn test_bytes_revator() {
2177 let s = "ศไทย中华Việt Nam";
2179 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2180 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2183 let mut pos = v.len();
2185 for b in s.bytes().rev() {
2187 assert_eq!(b, v[pos]);
2192 fn test_char_indicesator() {
2193 let s = "ศไทย中华Việt Nam";
2194 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2195 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2198 let mut it = s.char_indices();
2201 assert_eq!(c, (p[pos], v[pos]));
2204 assert_eq!(pos, v.len());
2205 assert_eq!(pos, p.len());
2209 fn test_char_indices_revator() {
2210 let s = "ศไทย中华Việt Nam";
2211 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2212 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2215 let mut it = s.char_indices().rev();
2218 assert_eq!(c, (p[pos], v[pos]));
2221 assert_eq!(pos, v.len());
2222 assert_eq!(pos, p.len());
2226 fn test_splitn_char_iterator() {
2227 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2229 let split: Vec<&str> = data.splitn(3, ' ').collect();
2230 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2232 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2233 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2236 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2237 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2239 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2240 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2244 fn test_split_char_iterator_no_trailing() {
2245 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2247 let split: Vec<&str> = data.split('\n').collect();
2248 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2250 let split: Vec<&str> = data.split_terminator('\n').collect();
2251 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2256 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2257 let words: Vec<&str> = data.words().collect();
2258 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2262 fn test_nfd_chars() {
2264 ($input: expr, $expected: expr) => {
2265 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2269 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2270 t!("\u{2026}", "\u{2026}");
2271 t!("\u{2126}", "\u{3a9}");
2272 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2273 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2274 t!("a\u{301}", "a\u{301}");
2275 t!("\u{301}a", "\u{301}a");
2276 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2277 t!("\u{ac1c}", "\u{1100}\u{1162}");
2281 fn test_nfkd_chars() {
2283 ($input: expr, $expected: expr) => {
2284 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2288 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2289 t!("\u{2026}", "...");
2290 t!("\u{2126}", "\u{3a9}");
2291 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2292 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2293 t!("a\u{301}", "a\u{301}");
2294 t!("\u{301}a", "\u{301}a");
2295 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2296 t!("\u{ac1c}", "\u{1100}\u{1162}");
2300 fn test_nfc_chars() {
2302 ($input: expr, $expected: expr) => {
2303 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2307 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2308 t!("\u{2026}", "\u{2026}");
2309 t!("\u{2126}", "\u{3a9}");
2310 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2311 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2312 t!("a\u{301}", "\u{e1}");
2313 t!("\u{301}a", "\u{301}a");
2314 t!("\u{d4db}", "\u{d4db}");
2315 t!("\u{ac1c}", "\u{ac1c}");
2316 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2320 fn test_nfkc_chars() {
2322 ($input: expr, $expected: expr) => {
2323 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2327 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2328 t!("\u{2026}", "...");
2329 t!("\u{2126}", "\u{3a9}");
2330 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2331 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2332 t!("a\u{301}", "\u{e1}");
2333 t!("\u{301}a", "\u{301}a");
2334 t!("\u{d4db}", "\u{d4db}");
2335 t!("\u{ac1c}", "\u{ac1c}");
2336 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2341 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2342 let lines: Vec<&str> = data.lines().collect();
2343 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2345 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2346 let lines: Vec<&str> = data.lines().collect();
2347 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2351 fn test_graphemes() {
2352 use core::iter::order;
2353 // official Unicode test data
2354 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2355 let test_same: [(_, &[_]); 325] = [
2356 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2357 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2358 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2359 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2360 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2361 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2362 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2363 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2364 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2365 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2366 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2367 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2368 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2369 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2370 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2371 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2372 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2373 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2374 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2375 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2376 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2377 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2378 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2379 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2380 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2381 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2382 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2383 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2384 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2385 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2386 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2387 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2388 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2389 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2390 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2391 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2392 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2393 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2394 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2395 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2396 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2397 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2398 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2399 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2400 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2401 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2402 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2403 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2404 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2405 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2406 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2407 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2408 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2409 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2410 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2411 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2412 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2413 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2414 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2415 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2416 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2417 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2418 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2419 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2420 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2421 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2422 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2423 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2424 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2425 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2426 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2427 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2428 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2429 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2430 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2431 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2432 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2433 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2434 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2435 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2436 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2437 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2438 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2439 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2440 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2441 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2442 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2443 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2444 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2445 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2446 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2447 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2448 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2449 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2450 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2451 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2452 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2453 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2454 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2455 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2456 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2457 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2458 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2459 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2460 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2461 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2462 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2463 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2464 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2465 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2466 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2467 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2468 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2469 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2470 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2471 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2472 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2473 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2474 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2475 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2476 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2477 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2478 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2479 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2480 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2481 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2482 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2483 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2484 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2485 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2486 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2487 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2488 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2489 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2490 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2491 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2492 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2493 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2494 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2495 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2496 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2497 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2498 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2499 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2500 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2501 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2502 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2503 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2504 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2505 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2506 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2507 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2508 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2509 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2510 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2511 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2512 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2513 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2514 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2515 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2516 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2517 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2518 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2519 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2520 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2521 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2522 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2523 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2524 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2525 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2526 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2527 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2528 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2529 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2530 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2531 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2532 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2533 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2534 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2535 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2536 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2537 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2538 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2539 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2540 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2541 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2542 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2543 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2544 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2545 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2546 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2547 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2548 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2549 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2550 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2551 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2552 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2553 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2554 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2555 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2556 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2557 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2558 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2559 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2560 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2561 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2562 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2563 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2564 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2565 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2566 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2567 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2568 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2569 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2570 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2571 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2572 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2573 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2574 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2575 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2576 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2577 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2578 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2579 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2580 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2581 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2582 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2583 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2584 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2585 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2586 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2587 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2588 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2589 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2590 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2591 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2592 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2593 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2594 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2595 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2596 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2597 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2598 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2599 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2600 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2601 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2602 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2603 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2604 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2605 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2606 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2607 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2608 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2609 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2610 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2611 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2612 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2613 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2614 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2615 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2616 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2617 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2618 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2619 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2620 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2621 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2622 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2623 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2624 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2625 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2626 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2627 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2628 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2629 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2630 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2631 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2632 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2633 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2634 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2635 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2636 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2637 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2638 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2639 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2640 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2641 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2642 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2643 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2644 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2645 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2646 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2647 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2648 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2649 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2650 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2651 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2652 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2653 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2654 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2655 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2656 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2657 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2658 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2659 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2660 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2661 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2662 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2663 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2664 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2665 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2666 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2667 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2668 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2669 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2670 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2671 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2672 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2673 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2674 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2675 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2676 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2677 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2678 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2679 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2680 "\u{1F1E7}\u{1F1E8}"]),
2681 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2682 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2683 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2684 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2687 let test_diff: [(_, &[_], &[_]); 23] = [
2688 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2689 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2690 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2691 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2692 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2693 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2694 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2695 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2696 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2697 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2698 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2699 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2700 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2701 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2702 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2703 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2704 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2705 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2706 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2707 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2708 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2709 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2710 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2713 for &(s, g) in test_same.iter() {
2714 // test forward iterator
2715 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2716 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2718 // test reverse iterator
2719 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2720 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2723 for &(s, gt, gf) in test_diff.iter() {
2724 // test forward iterator
2725 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2726 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2728 // test reverse iterator
2729 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2730 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2733 // test the indices iterators
2734 let s = "a̐éö̲\r\n";
2735 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2736 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2737 assert_eq!(gr_inds, b);
2738 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2739 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2740 assert_eq!(gr_inds, b);
2741 let mut gr_inds_iter = s.grapheme_indices(true);
2743 let gr_inds = gr_inds_iter.by_ref();
2744 let e1 = gr_inds.size_hint();
2745 assert_eq!(e1, (1, Some(13)));
2746 let c = gr_inds.count();
2749 let e2 = gr_inds_iter.size_hint();
2750 assert_eq!(e2, (0, Some(0)));
2752 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2754 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2755 let b: &[_] = &["\r", "\r\n", "\n"];
2760 fn test_split_strator() {
2761 fn t(s: &str, sep: &str, u: &[&str]) {
2762 let v: Vec<&str> = s.split_str(sep).collect();
2765 t("--1233345--", "12345", &["--1233345--"]);
2766 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2767 t("::hello::there", "::", &["", "hello", "there"]);
2768 t("hello::there::", "::", &["hello", "there", ""]);
2769 t("::hello::there::", "::", &["", "hello", "there", ""]);
2770 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2771 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2772 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2773 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2775 t("zz", "zz", &["",""]);
2776 t("ok", "z", &["ok"]);
2777 t("zzz", "zz", &["","z"]);
2778 t("zzzzz", "zz", &["","","z"]);
2782 fn test_str_default() {
2783 use core::default::Default;
2784 fn t<S: Default + Str>() {
2785 let s: S = Default::default();
2786 assert_eq!(s.as_slice(), "");
2794 fn test_str_container() {
2795 fn sum_len(v: &[&str]) -> uint {
2796 v.iter().map(|x| x.len()).sum()
2799 let s = String::from_str("01234");
2800 assert_eq!(5, sum_len(&["012", "", "34"]));
2801 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2802 String::from_str("2").as_slice(),
2803 String::from_str("34").as_slice(),
2804 String::from_str("").as_slice()]));
2805 assert_eq!(5, sum_len(&[s.as_slice()]));
2809 fn test_str_from_utf8() {
2811 assert_eq!(from_utf8(xs), Ok("hello"));
2813 let xs = "ศไทย中华Việt Nam".as_bytes();
2814 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2816 let xs = b"hello\xFF";
2817 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2824 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2826 use test::black_box;
2829 fn char_iterator(b: &mut Bencher) {
2830 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2832 b.iter(|| s.chars().count());
2836 fn char_iterator_for(b: &mut Bencher) {
2837 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2840 for ch in s.chars() { black_box(ch) }
2845 fn char_iterator_ascii(b: &mut Bencher) {
2846 let s = "Mary had a little lamb, Little lamb
2847 Mary had a little lamb, Little lamb
2848 Mary had a little lamb, Little lamb
2849 Mary had a little lamb, Little lamb
2850 Mary had a little lamb, Little lamb
2851 Mary had a little lamb, Little lamb";
2853 b.iter(|| s.chars().count());
2857 fn char_iterator_rev(b: &mut Bencher) {
2858 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2860 b.iter(|| s.chars().rev().count());
2864 fn char_iterator_rev_for(b: &mut Bencher) {
2865 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2868 for ch in s.chars().rev() { black_box(ch) }
2873 fn char_indicesator(b: &mut Bencher) {
2874 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2875 let len = s.chars().count();
2877 b.iter(|| assert_eq!(s.char_indices().count(), len));
2881 fn char_indicesator_rev(b: &mut Bencher) {
2882 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2883 let len = s.chars().count();
2885 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2889 fn split_unicode_ascii(b: &mut Bencher) {
2890 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2892 b.iter(|| assert_eq!(s.split('V').count(), 3));
2896 fn split_unicode_not_ascii(b: &mut Bencher) {
2897 struct NotAscii(char);
2898 impl CharEq for NotAscii {
2899 fn matches(&mut self, c: char) -> bool {
2900 let NotAscii(cc) = *self;
2903 fn only_ascii(&self) -> bool { false }
2905 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2907 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2912 fn split_ascii(b: &mut Bencher) {
2913 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2914 let len = s.split(' ').count();
2916 b.iter(|| assert_eq!(s.split(' ').count(), len));
2920 fn split_not_ascii(b: &mut Bencher) {
2921 struct NotAscii(char);
2922 impl CharEq for NotAscii {
2924 fn matches(&mut self, c: char) -> bool {
2925 let NotAscii(cc) = *self;
2928 fn only_ascii(&self) -> bool { false }
2930 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2931 let len = s.split(' ').count();
2933 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2937 fn split_extern_fn(b: &mut Bencher) {
2938 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2939 let len = s.split(' ').count();
2940 fn pred(c: char) -> bool { c == ' ' }
2942 b.iter(|| assert_eq!(s.split(pred).count(), len));
2946 fn split_closure(b: &mut Bencher) {
2947 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2948 let len = s.split(' ').count();
2950 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2954 fn split_slice(b: &mut Bencher) {
2955 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2956 let len = s.split(' ').count();
2958 let c: &[char] = &[' '];
2959 b.iter(|| assert_eq!(s.split(c).count(), len));
2963 fn bench_connect(b: &mut Bencher) {
2964 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2966 let v = vec![s, s, s, s, s, s, s, s, s, s];
2968 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2973 fn bench_contains_short_short(b: &mut Bencher) {
2974 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2978 assert!(haystack.contains(needle));
2983 fn bench_contains_short_long(b: &mut Bencher) {
2985 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2986 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2987 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2988 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2989 tempus vel, gravida nec quam.
2991 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2992 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2993 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2994 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2995 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2996 interdum. Curabitur ut nisi justo.
2998 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2999 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
3000 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
3001 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
3002 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
3003 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3004 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3005 Aliquam sit amet placerat lorem.
3007 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3008 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3009 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3010 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3011 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3014 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3015 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3016 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3017 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3018 malesuada sollicitudin quam eu fermentum.";
3019 let needle = "english";
3022 assert!(!haystack.contains(needle));
3027 fn bench_contains_bad_naive(b: &mut Bencher) {
3028 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3029 let needle = "aaaaaaaab";
3032 assert!(!haystack.contains(needle));
3037 fn bench_contains_equal(b: &mut Bencher) {
3038 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3039 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3042 assert!(haystack.contains(needle));