1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
58 use core::borrow::{BorrowFrom, ToOwned};
59 use core::char::CharExt;
60 use core::clone::Clone;
61 use core::iter::AdditiveIterator;
62 use core::iter::{range, Iterator, IteratorExt};
63 use core::kinds::Sized;
65 use core::option::Option::{self, Some, None};
66 use core::slice::AsSlice;
67 use core::str as core_str;
68 use unicode::str::{UnicodeStr, Utf16Encoder};
70 use ring_buf::RingBuf;
75 use slice::SliceConcatExt;
77 pub use core::str::{FromStr, Utf8Error, Str};
78 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
79 pub use core::str::{Split, SplitTerminator};
80 pub use core::str::{SplitN, RSplitN};
81 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
82 pub use core::str::{from_utf8_unchecked, from_c_str};
83 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
86 Section: Creating a string
89 impl<S: Str> SliceConcatExt<str, String> for [S] {
90 fn concat(&self) -> String {
91 let s = self.as_slice();
97 // `len` calculation may overflow but push_str will check boundaries
98 let len = s.iter().map(|s| s.as_slice().len()).sum();
99 let mut result = String::with_capacity(len);
102 result.push_str(s.as_slice())
108 fn connect(&self, sep: &str) -> String {
109 let s = self.as_slice();
112 return String::new();
120 // this is wrong without the guarantee that `self` is non-empty
121 // `len` calculation may overflow but push_str but will check boundaries
122 let len = sep.len() * (s.len() - 1)
123 + s.iter().map(|s| s.as_slice().len()).sum();
124 let mut result = String::with_capacity(len);
125 let mut first = true;
131 result.push_str(sep);
133 result.push_str(s.as_slice());
143 // Helper functions used for Unicode normalization
144 fn canonical_sort(comb: &mut [(char, u8)]) {
145 let len = comb.len();
146 for i in range(0, len) {
147 let mut swapped = false;
148 for j in range(1, len-i) {
149 let class_a = comb[j-1].1;
150 let class_b = comb[j].1;
151 if class_a != 0 && class_b != 0 && class_a > class_b {
156 if !swapped { break; }
161 enum DecompositionType {
166 /// External iterator for a string's decomposition's characters.
167 /// Use with the `std::iter` module.
170 pub struct Decompositions<'a> {
171 kind: DecompositionType,
173 buffer: Vec<(char, u8)>,
178 impl<'a> Iterator for Decompositions<'a> {
182 fn next(&mut self) -> Option<char> {
183 match self.buffer.first() {
186 self.buffer.remove(0);
189 Some(&(c, _)) if self.sorted => {
190 self.buffer.remove(0);
193 _ => self.sorted = false
197 for ch in self.iter {
198 let buffer = &mut self.buffer;
199 let sorted = &mut self.sorted;
201 let callback = |&mut: d| {
203 unicode::char::canonical_combining_class(d);
204 if class == 0 && !*sorted {
205 canonical_sort(buffer.as_mut_slice());
208 buffer.push((d, class));
212 unicode::char::decompose_canonical(ch, callback)
215 unicode::char::decompose_compatible(ch, callback)
226 canonical_sort(self.buffer.as_mut_slice());
230 if self.buffer.is_empty() {
233 match self.buffer.remove(0) {
243 fn size_hint(&self) -> (uint, Option<uint>) {
244 let (lower, _) = self.iter.size_hint();
250 enum RecompositionState {
256 /// External iterator for a string's recomposition's characters.
257 /// Use with the `std::iter` module.
260 pub struct Recompositions<'a> {
261 iter: Decompositions<'a>,
262 state: RecompositionState,
263 buffer: RingBuf<char>,
264 composee: Option<char>,
269 impl<'a> Iterator for Recompositions<'a> {
273 fn next(&mut self) -> Option<char> {
277 for ch in self.iter {
278 let ch_class = unicode::char::canonical_combining_class(ch);
279 if self.composee.is_none() {
283 self.composee = Some(ch);
286 let k = self.composee.clone().unwrap();
288 match self.last_ccc {
290 match unicode::char::compose(k, ch) {
292 self.composee = Some(r);
297 self.composee = Some(ch);
300 self.buffer.push_back(ch);
301 self.last_ccc = Some(ch_class);
306 if l_class >= ch_class {
307 // `ch` is blocked from `composee`
309 self.composee = Some(ch);
310 self.last_ccc = None;
311 self.state = Purging;
314 self.buffer.push_back(ch);
315 self.last_ccc = Some(ch_class);
318 match unicode::char::compose(k, ch) {
320 self.composee = Some(r);
324 self.buffer.push_back(ch);
325 self.last_ccc = Some(ch_class);
331 self.state = Finished;
332 if self.composee.is_some() {
333 return self.composee.take();
337 match self.buffer.pop_front() {
338 None => self.state = Composing,
343 match self.buffer.pop_front() {
344 None => return self.composee.take(),
353 /// External iterator for a string's UTF16 codeunits.
354 /// Use with the `std::iter` module.
357 pub struct Utf16Units<'a> {
358 encoder: Utf16Encoder<Chars<'a>>
362 impl<'a> Iterator for Utf16Units<'a> {
366 fn next(&mut self) -> Option<u16> { self.encoder.next() }
369 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
376 // Return the initial codepoint accumulator for the first byte.
377 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
378 // for width 3, and 3 bits for width 4
379 macro_rules! utf8_first_byte {
380 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
383 // return the value of $ch updated with continuation byte $byte
384 macro_rules! utf8_acc_cont_byte {
385 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
388 #[unstable = "trait is unstable"]
389 impl BorrowFrom<String> for str {
390 fn borrow_from(owned: &String) -> &str { owned[] }
393 #[unstable = "trait is unstable"]
394 impl ToOwned<String> for str {
395 fn to_owned(&self) -> String {
397 String::from_utf8_unchecked(self.as_bytes().to_owned())
407 Section: Trait implementations
410 /// Any string that can be represented as a slice.
412 pub trait StrExt for Sized?: ops::Slice<uint, str> {
413 /// Escapes each char in `s` with `char::escape_default`.
414 #[unstable = "return type may change to be an iterator"]
415 fn escape_default(&self) -> String {
416 self.chars().flat_map(|c| c.escape_default()).collect()
419 /// Escapes each char in `s` with `char::escape_unicode`.
420 #[unstable = "return type may change to be an iterator"]
421 fn escape_unicode(&self) -> String {
422 self.chars().flat_map(|c| c.escape_unicode()).collect()
425 /// Replaces all occurrences of one string with another.
429 /// * `from` - The string to replace
430 /// * `to` - The replacement string
434 /// The original string with all occurrences of `from` replaced with `to`.
439 /// let s = "Do you know the muffin man,
440 /// The muffin man, the muffin man, ...".to_string();
442 /// assert_eq!(s.replace("muffin man", "little lamb"),
443 /// "Do you know the little lamb,
444 /// The little lamb, the little lamb, ...".to_string());
446 /// // not found, so no change.
447 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
450 fn replace(&self, from: &str, to: &str) -> String {
451 let mut result = String::new();
452 let mut last_end = 0;
453 for (start, end) in self.match_indices(from) {
454 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
458 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
462 /// Returns an iterator over the string in Unicode Normalization Form D
463 /// (canonical decomposition).
465 #[unstable = "this functionality may be moved to libunicode"]
466 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
468 iter: self[].chars(),
475 /// Returns an iterator over the string in Unicode Normalization Form KD
476 /// (compatibility decomposition).
478 #[unstable = "this functionality may be moved to libunicode"]
479 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
481 iter: self[].chars(),
488 /// An Iterator over the string in Unicode Normalization Form C
489 /// (canonical decomposition followed by canonical composition).
491 #[unstable = "this functionality may be moved to libunicode"]
492 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
494 iter: self.nfd_chars(),
496 buffer: RingBuf::new(),
502 /// An Iterator over the string in Unicode Normalization Form KC
503 /// (compatibility decomposition followed by canonical composition).
505 #[unstable = "this functionality may be moved to libunicode"]
506 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
508 iter: self.nfkd_chars(),
510 buffer: RingBuf::new(),
516 /// Returns true if a string contains a string pattern.
520 /// - pat - The string pattern to look for
525 /// assert!("bananas".contains("nana"));
528 fn contains(&self, pat: &str) -> bool {
529 core_str::StrExt::contains(self[], pat)
532 /// Returns true if a string contains a char pattern.
536 /// - pat - The char pattern to look for
541 /// assert!("hello".contains_char('e'));
543 #[unstable = "might get removed in favour of a more generic contains()"]
544 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
545 core_str::StrExt::contains_char(self[], pat)
548 /// An iterator over the characters of `self`. Note, this iterates
549 /// over Unicode code-points, not Unicode graphemes.
554 /// let v: Vec<char> = "abc åäö".chars().collect();
555 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
558 fn chars(&self) -> Chars {
559 core_str::StrExt::chars(self[])
562 /// An iterator over the bytes of `self`
567 /// let v: Vec<u8> = "bors".bytes().collect();
568 /// assert_eq!(v, b"bors".to_vec());
571 fn bytes(&self) -> Bytes {
572 core_str::StrExt::bytes(self[])
575 /// An iterator over the characters of `self` and their byte offsets.
577 fn char_indices(&self) -> CharIndices {
578 core_str::StrExt::char_indices(self[])
581 /// An iterator over substrings of `self`, separated by characters
582 /// matched by the pattern `pat`.
587 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
588 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
590 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
591 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
593 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
594 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
596 /// let v: Vec<&str> = "".split('X').collect();
597 /// assert_eq!(v, vec![""]);
600 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
601 core_str::StrExt::split(self[], pat)
604 /// An iterator over substrings of `self`, separated by characters
605 /// matched by the pattern `pat`, restricted to splitting at most `count`
611 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
612 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
614 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
615 /// assert_eq!(v, vec!["abc", "def2ghi"]);
617 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
618 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
620 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
621 /// assert_eq!(v, vec!["abcXdef"]);
623 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
624 /// assert_eq!(v, vec![""]);
627 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
628 core_str::StrExt::splitn(self[], count, pat)
631 /// An iterator over substrings of `self`, separated by characters
632 /// matched by the pattern `pat`.
634 /// Equivalent to `split`, except that the trailing substring
635 /// is skipped if empty (terminator semantics).
640 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
641 /// assert_eq!(v, vec!["A", "B"]);
643 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
644 /// assert_eq!(v, vec!["A", "", "B", ""]);
646 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
647 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
649 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
650 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
652 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
653 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
655 #[unstable = "might get removed"]
656 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
657 core_str::StrExt::split_terminator(self[], pat)
660 /// An iterator over substrings of `self`, separated by characters
661 /// matched by the pattern `pat`, starting from the end of the string.
662 /// Restricted to splitting at most `count` times.
667 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
668 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
670 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
671 /// assert_eq!(v, vec!["ghi", "abc1def"]);
673 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
674 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
677 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
678 core_str::StrExt::rsplitn(self[], count, pat)
681 /// An iterator over the start and end indices of the disjoint
682 /// matches of the pattern `pat` within `self`.
684 /// That is, each returned value `(start, end)` satisfies
685 /// `self.slice(start, end) == sep`. For matches of `sep` within
686 /// `self` that overlap, only the indices corresponding to the
687 /// first match are returned.
692 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
693 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
695 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
696 /// assert_eq!(v, vec![(1,4), (4,7)]);
698 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
699 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
701 #[unstable = "might have its iterator type changed"]
702 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
703 core_str::StrExt::match_indices(self[], pat)
706 /// An iterator over the substrings of `self` separated by the pattern `sep`.
711 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
712 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
714 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
715 /// assert_eq!(v, vec!["1", "", "2"]);
717 #[unstable = "might get removed in the future in favor of a more generic split()"]
718 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
719 core_str::StrExt::split_str(self[], pat)
722 /// An iterator over the lines of a string (subsequences separated
723 /// by `\n`). This does not include the empty string after a
729 /// let four_lines = "foo\nbar\n\nbaz\n";
730 /// let v: Vec<&str> = four_lines.lines().collect();
731 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
734 fn lines(&self) -> Lines {
735 core_str::StrExt::lines(self[])
738 /// An iterator over the lines of a string, separated by either
739 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
740 /// empty trailing line.
745 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
746 /// let v: Vec<&str> = four_lines.lines_any().collect();
747 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
750 fn lines_any(&self) -> LinesAny {
751 core_str::StrExt::lines_any(self[])
754 /// Returns a slice of the given string from the byte range
755 /// [`begin`..`end`).
757 /// This operation is `O(1)`.
759 /// Panics when `begin` and `end` do not point to valid characters
760 /// or point beyond the last character of the string.
762 /// See also `slice_to` and `slice_from` for slicing prefixes and
763 /// suffixes of strings, and `slice_chars` for slicing based on
764 /// code point counts.
769 /// let s = "Löwe 老虎 Léopard";
770 /// assert_eq!(s.slice(0, 1), "L");
772 /// assert_eq!(s.slice(1, 9), "öwe 老");
774 /// // these will panic:
775 /// // byte 2 lies within `ö`:
776 /// // s.slice(2, 3);
778 /// // byte 8 lies within `老`
779 /// // s.slice(1, 8);
781 /// // byte 100 is outside the string
782 /// // s.slice(3, 100);
784 #[unstable = "use slice notation [a..b] instead"]
785 fn slice(&self, begin: uint, end: uint) -> &str {
786 core_str::StrExt::slice(self[], begin, end)
789 /// Returns a slice of the string from `begin` to its end.
791 /// Equivalent to `self.slice(begin, self.len())`.
793 /// Panics when `begin` does not point to a valid character, or is
796 /// See also `slice`, `slice_to` and `slice_chars`.
797 #[unstable = "use slice notation [a..] instead"]
798 fn slice_from(&self, begin: uint) -> &str {
799 core_str::StrExt::slice_from(self[], begin)
802 /// Returns a slice of the string from the beginning to byte
805 /// Equivalent to `self.slice(0, end)`.
807 /// Panics when `end` does not point to a valid character, or is
810 /// See also `slice`, `slice_from` and `slice_chars`.
811 #[unstable = "use slice notation [0..a] instead"]
812 fn slice_to(&self, end: uint) -> &str {
813 core_str::StrExt::slice_to(self[], end)
816 /// Returns a slice of the string from the character range
817 /// [`begin`..`end`).
819 /// That is, start at the `begin`-th code point of the string and
820 /// continue to the `end`-th code point. This does not detect or
821 /// handle edge cases such as leaving a combining character as the
822 /// first code point of the string.
824 /// Due to the design of UTF-8, this operation is `O(end)`.
825 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
826 /// variants that use byte indices rather than code point
829 /// Panics if `begin` > `end` or the either `begin` or `end` are
830 /// beyond the last character of the string.
835 /// let s = "Löwe 老虎 Léopard";
836 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
837 /// assert_eq!(s.slice_chars(5, 7), "老虎");
839 #[unstable = "may have yet to prove its worth"]
840 fn slice_chars(&self, begin: uint, end: uint) -> &str {
841 core_str::StrExt::slice_chars(self[], begin, end)
844 /// Takes a bytewise (not UTF-8) slice from a string.
846 /// Returns the substring from [`begin`..`end`).
848 /// Caller must check both UTF-8 character boundaries and the boundaries of
849 /// the entire slice as well.
851 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
852 core_str::StrExt::slice_unchecked(self[], begin, end)
855 /// Returns true if the pattern `pat` is a prefix of the string.
860 /// assert!("banana".starts_with("ba"));
863 fn starts_with(&self, pat: &str) -> bool {
864 core_str::StrExt::starts_with(self[], pat)
867 /// Returns true if the pattern `pat` is a suffix of the string.
872 /// assert!("banana".ends_with("nana"));
875 fn ends_with(&self, pat: &str) -> bool {
876 core_str::StrExt::ends_with(self[], pat)
879 /// Returns a string with all pre- and suffixes that match
880 /// the pattern `pat` repeatedly removed.
884 /// * pat - a string pattern
889 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
890 /// let x: &[_] = &['1', '2'];
891 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
892 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
895 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
896 core_str::StrExt::trim_matches(self[], pat)
899 /// Returns a string with all prefixes that match
900 /// the pattern `pat` repeatedly removed.
904 /// * pat - a string pattern
909 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
910 /// let x: &[_] = &['1', '2'];
911 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
912 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
915 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
916 core_str::StrExt::trim_left_matches(self[], pat)
919 /// Returns a string with all suffixes that match
920 /// the pattern `pat` repeatedly removed.
924 /// * pat - a string pattern
929 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
930 /// let x: &[_] = &['1', '2'];
931 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
932 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
935 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
936 core_str::StrExt::trim_right_matches(self[], pat)
939 /// Check that `index`-th byte lies at the start and/or end of a
940 /// UTF-8 code point sequence.
942 /// The start and end of the string (when `index == self.len()`)
943 /// are considered to be boundaries.
945 /// Panics if `index` is greater than `self.len()`.
950 /// let s = "Löwe 老虎 Léopard";
951 /// assert!(s.is_char_boundary(0));
953 /// assert!(s.is_char_boundary(6));
954 /// assert!(s.is_char_boundary(s.len()));
956 /// // second byte of `ö`
957 /// assert!(!s.is_char_boundary(2));
959 /// // third byte of `老`
960 /// assert!(!s.is_char_boundary(8));
962 #[unstable = "naming is uncertain with container conventions"]
963 fn is_char_boundary(&self, index: uint) -> bool {
964 core_str::StrExt::is_char_boundary(self[], index)
967 /// Pluck a character out of a string and return the index of the next
970 /// This function can be used to iterate over the Unicode characters of a
975 /// This example manually iterates through the characters of a
976 /// string; this should normally be done by `.chars()` or
980 /// use std::str::CharRange;
982 /// let s = "中华Việt Nam";
984 /// while i < s.len() {
985 /// let CharRange {ch, next} = s.char_range_at(i);
986 /// println!("{}: {}", i, ch);
1008 /// * s - The string
1009 /// * i - The byte offset of the char to extract
1013 /// A record {ch: char, next: uint} containing the char value and the byte
1014 /// index of the next Unicode character.
1018 /// If `i` is greater than or equal to the length of the string.
1019 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1020 #[unstable = "naming is uncertain with container conventions"]
1021 fn char_range_at(&self, start: uint) -> CharRange {
1022 core_str::StrExt::char_range_at(self[], start)
1025 /// Given a byte position and a str, return the previous char and its position.
1027 /// This function can be used to iterate over a Unicode string in reverse.
1029 /// Returns 0 for next index if called on start index 0.
1033 /// If `i` is greater than the length of the string.
1034 /// If `i` is not an index following a valid UTF-8 character.
1035 #[unstable = "naming is uncertain with container conventions"]
1036 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1037 core_str::StrExt::char_range_at_reverse(self[], start)
1040 /// Plucks the character starting at the `i`th byte of a string.
1046 /// assert_eq!(s.char_at(1), 'b');
1047 /// assert_eq!(s.char_at(2), 'π');
1048 /// assert_eq!(s.char_at(4), 'c');
1053 /// If `i` is greater than or equal to the length of the string.
1054 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1055 #[unstable = "naming is uncertain with container conventions"]
1056 fn char_at(&self, i: uint) -> char {
1057 core_str::StrExt::char_at(self[], i)
1060 /// Plucks the character ending at the `i`th byte of a string.
1064 /// If `i` is greater than the length of the string.
1065 /// If `i` is not an index following a valid UTF-8 character.
1066 #[unstable = "naming is uncertain with container conventions"]
1067 fn char_at_reverse(&self, i: uint) -> char {
1068 core_str::StrExt::char_at_reverse(self[], i)
1071 /// Work with the byte buffer of a string as a byte slice.
1076 /// assert_eq!("bors".as_bytes(), b"bors");
1079 fn as_bytes(&self) -> &[u8] {
1080 core_str::StrExt::as_bytes(self[])
1083 /// Returns the byte index of the first character of `self` that
1084 /// matches the pattern `pat`.
1088 /// `Some` containing the byte index of the last matching character
1089 /// or `None` if there is no match
1094 /// let s = "Löwe 老虎 Léopard";
1096 /// assert_eq!(s.find('L'), Some(0));
1097 /// assert_eq!(s.find('é'), Some(14));
1099 /// // the first space
1100 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1102 /// // neither are found
1103 /// let x: &[_] = &['1', '2'];
1104 /// assert_eq!(s.find(x), None);
1107 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1108 core_str::StrExt::find(self[], pat)
1111 /// Returns the byte index of the last character of `self` that
1112 /// matches the pattern `pat`.
1116 /// `Some` containing the byte index of the last matching character
1117 /// or `None` if there is no match.
1122 /// let s = "Löwe 老虎 Léopard";
1124 /// assert_eq!(s.rfind('L'), Some(13));
1125 /// assert_eq!(s.rfind('é'), Some(14));
1127 /// // the second space
1128 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1130 /// // searches for an occurrence of either `1` or `2`, but neither are found
1131 /// let x: &[_] = &['1', '2'];
1132 /// assert_eq!(s.rfind(x), None);
1135 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1136 core_str::StrExt::rfind(self[], pat)
1139 /// Returns the byte index of the first matching substring
1143 /// * `needle` - The string to search for
1147 /// `Some` containing the byte index of the first matching substring
1148 /// or `None` if there is no match.
1153 /// let s = "Löwe 老虎 Léopard";
1155 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1156 /// assert_eq!(s.find_str("muffin man"), None);
1158 #[unstable = "might get removed in favor of a more generic find in the future"]
1159 fn find_str(&self, needle: &str) -> Option<uint> {
1160 core_str::StrExt::find_str(self[], needle)
1163 /// Retrieves the first character from a string slice and returns
1164 /// it. This does not allocate a new string; instead, it returns a
1165 /// slice that point one character beyond the character that was
1166 /// shifted. If the string does not contain any characters,
1167 /// None is returned instead.
1172 /// let s = "Löwe 老虎 Léopard";
1173 /// let (c, s1) = s.slice_shift_char().unwrap();
1174 /// assert_eq!(c, 'L');
1175 /// assert_eq!(s1, "öwe 老虎 Léopard");
1177 /// let (c, s2) = s1.slice_shift_char().unwrap();
1178 /// assert_eq!(c, 'ö');
1179 /// assert_eq!(s2, "we 老虎 Léopard");
1181 #[unstable = "awaiting conventions about shifting and slices"]
1182 fn slice_shift_char(&self) -> Option<(char, &str)> {
1183 core_str::StrExt::slice_shift_char(self[])
1186 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1188 /// Panics if `inner` is not a direct slice contained within self.
1193 /// let string = "a\nb\nc";
1194 /// let lines: Vec<&str> = string.lines().collect();
1196 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1197 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1198 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1200 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1201 fn subslice_offset(&self, inner: &str) -> uint {
1202 core_str::StrExt::subslice_offset(self[], inner)
1205 /// Return an unsafe pointer to the strings buffer.
1207 /// The caller must ensure that the string outlives this pointer,
1208 /// and that it is not reallocated (e.g. by pushing to the
1212 fn as_ptr(&self) -> *const u8 {
1213 core_str::StrExt::as_ptr(self[])
1216 /// Return an iterator of `u16` over the string encoded as UTF-16.
1217 #[unstable = "this functionality may only be provided by libunicode"]
1218 fn utf16_units(&self) -> Utf16Units {
1219 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1222 /// Return the number of bytes in this string
1227 /// assert_eq!("foo".len(), 3);
1228 /// assert_eq!("ƒoo".len(), 4);
1232 fn len(&self) -> uint {
1233 core_str::StrExt::len(self[])
1236 /// Returns true if this slice contains no bytes
1241 /// assert!("".is_empty());
1245 fn is_empty(&self) -> bool {
1246 core_str::StrExt::is_empty(self[])
1249 /// Parse this string into the specified type.
1254 /// assert_eq!("4".parse::<u32>(), Some(4));
1255 /// assert_eq!("j".parse::<u32>(), None);
1258 #[unstable = "this method was just created"]
1259 fn parse<F: FromStr>(&self) -> Option<F> {
1260 core_str::StrExt::parse(self[])
1263 /// Returns an iterator over the
1264 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1267 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1268 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1269 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1270 /// recommends extended grapheme cluster boundaries for general processing.
1275 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1276 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1277 /// assert_eq!(gr1.as_slice(), b);
1278 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1279 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1280 /// assert_eq!(gr2.as_slice(), b);
1282 #[unstable = "this functionality may only be provided by libunicode"]
1283 fn graphemes(&self, is_extended: bool) -> Graphemes {
1284 UnicodeStr::graphemes(self[], is_extended)
1287 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1288 /// See `graphemes()` method for more information.
1293 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1294 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1295 /// assert_eq!(gr_inds.as_slice(), b);
1297 #[unstable = "this functionality may only be provided by libunicode"]
1298 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1299 UnicodeStr::grapheme_indices(self[], is_extended)
1302 /// An iterator over the words of a string (subsequences separated
1303 /// by any sequence of whitespace). Sequences of whitespace are
1304 /// collapsed, so empty "words" are not included.
1309 /// let some_words = " Mary had\ta little \n\t lamb";
1310 /// let v: Vec<&str> = some_words.words().collect();
1311 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1314 fn words(&self) -> Words {
1315 UnicodeStr::words(self[])
1318 /// Returns a string's displayed width in columns, treating control
1319 /// characters as zero-width.
1321 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1322 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1323 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1324 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1325 /// recommends that these characters be treated as 1 column (i.e.,
1326 /// `is_cjk` = `false`) if the locale is unknown.
1327 #[unstable = "this functionality may only be provided by libunicode"]
1328 fn width(&self, is_cjk: bool) -> uint {
1329 UnicodeStr::width(self[], is_cjk)
1332 /// Returns a string with leading and trailing whitespace removed.
1334 fn trim(&self) -> &str {
1335 UnicodeStr::trim(self[])
1338 /// Returns a string with leading whitespace removed.
1340 fn trim_left(&self) -> &str {
1341 UnicodeStr::trim_left(self[])
1344 /// Returns a string with trailing whitespace removed.
1346 fn trim_right(&self) -> &str {
1347 UnicodeStr::trim_right(self[])
1352 impl StrExt for str {}
1358 use core::iter::AdditiveIterator;
1359 use super::from_utf8;
1360 use super::Utf8Error;
1365 assert!("" <= "foo");
1366 assert!("foo" <= "foo");
1367 assert!("foo" != "bar");
1372 assert_eq!("".len(), 0u);
1373 assert_eq!("hello world".len(), 11u);
1374 assert_eq!("\x63".len(), 1u);
1375 assert_eq!("\u{a2}".len(), 2u);
1376 assert_eq!("\u{3c0}".len(), 2u);
1377 assert_eq!("\u{2620}".len(), 3u);
1378 assert_eq!("\u{1d11e}".len(), 4u);
1380 assert_eq!("".chars().count(), 0u);
1381 assert_eq!("hello world".chars().count(), 11u);
1382 assert_eq!("\x63".chars().count(), 1u);
1383 assert_eq!("\u{a2}".chars().count(), 1u);
1384 assert_eq!("\u{3c0}".chars().count(), 1u);
1385 assert_eq!("\u{2620}".chars().count(), 1u);
1386 assert_eq!("\u{1d11e}".chars().count(), 1u);
1387 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1389 assert_eq!("hello".width(false), 10u);
1390 assert_eq!("hello".width(true), 10u);
1391 assert_eq!("\0\0\0\0\0".width(false), 0u);
1392 assert_eq!("\0\0\0\0\0".width(true), 0u);
1393 assert_eq!("".width(false), 0u);
1394 assert_eq!("".width(true), 0u);
1395 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1396 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1401 assert_eq!("hello".find('l'), Some(2u));
1402 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1403 assert!("hello".find('x').is_none());
1404 assert!("hello".find(|&: c:char| c == 'x').is_none());
1405 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1406 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1411 assert_eq!("hello".rfind('l'), Some(3u));
1412 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1413 assert!("hello".rfind('x').is_none());
1414 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1415 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1416 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1421 let empty = String::from_str("");
1422 let s: String = empty.chars().collect();
1423 assert_eq!(empty, s);
1424 let data = String::from_str("ประเทศไทย中");
1425 let s: String = data.chars().collect();
1426 assert_eq!(data, s);
1430 fn test_into_bytes() {
1431 let data = String::from_str("asdf");
1432 let buf = data.into_bytes();
1433 assert_eq!(b"asdf", buf);
1437 fn test_find_str() {
1439 assert_eq!("".find_str(""), Some(0u));
1440 assert!("banana".find_str("apple pie").is_none());
1442 let data = "abcabc";
1443 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1444 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1445 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1447 let string = "ประเทศไทย中华Việt Nam";
1448 let mut data = String::from_str(string);
1449 data.push_str(string);
1450 assert!(data.find_str("ไท华").is_none());
1451 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1452 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1454 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1455 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1456 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1457 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1458 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1460 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1461 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1462 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1463 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1464 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1468 fn test_slice_chars() {
1469 fn t(a: &str, b: &str, start: uint) {
1470 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1473 t("hello", "llo", 2);
1474 t("hello", "el", 1);
1477 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1480 fn s(x: &str) -> String { x.to_string() }
1482 macro_rules! test_concat {
1483 ($expected: expr, $string: expr) => {
1485 let s: String = $string.concat();
1486 assert_eq!($expected, s);
1492 fn test_concat_for_different_types() {
1493 test_concat!("ab", vec![s("a"), s("b")]);
1494 test_concat!("ab", vec!["a", "b"]);
1495 test_concat!("ab", vec!["a", "b"].as_slice());
1496 test_concat!("ab", vec![s("a"), s("b")]);
1500 fn test_concat_for_different_lengths() {
1501 let empty: &[&str] = &[];
1502 test_concat!("", empty);
1503 test_concat!("a", ["a"]);
1504 test_concat!("ab", ["a", "b"]);
1505 test_concat!("abc", ["", "a", "bc"]);
1508 macro_rules! test_connect {
1509 ($expected: expr, $string: expr, $delim: expr) => {
1511 let s = $string.connect($delim);
1512 assert_eq!($expected, s);
1518 fn test_connect_for_different_types() {
1519 test_connect!("a-b", ["a", "b"], "-");
1520 let hyphen = "-".to_string();
1521 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1522 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1523 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1524 test_connect!("a-b", vec![s("a"), s("b")], "-");
1528 fn test_connect_for_different_lengths() {
1529 let empty: &[&str] = &[];
1530 test_connect!("", empty, "-");
1531 test_connect!("a", ["a"], "-");
1532 test_connect!("a-b", ["a", "b"], "-");
1533 test_connect!("-a-bc", ["", "a", "bc"], "-");
1537 fn test_unsafe_slice() {
1538 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1539 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1540 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1541 fn a_million_letter_a() -> String {
1543 let mut rs = String::new();
1545 rs.push_str("aaaaaaaaaa");
1550 fn half_a_million_letter_a() -> String {
1552 let mut rs = String::new();
1554 rs.push_str("aaaaa");
1559 let letters = a_million_letter_a();
1560 assert!(half_a_million_letter_a() ==
1561 unsafe {String::from_str(letters.slice_unchecked(
1567 fn test_starts_with() {
1568 assert!(("".starts_with("")));
1569 assert!(("abc".starts_with("")));
1570 assert!(("abc".starts_with("a")));
1571 assert!((!"a".starts_with("abc")));
1572 assert!((!"".starts_with("abc")));
1573 assert!((!"ödd".starts_with("-")));
1574 assert!(("ödd".starts_with("öd")));
1578 fn test_ends_with() {
1579 assert!(("".ends_with("")));
1580 assert!(("abc".ends_with("")));
1581 assert!(("abc".ends_with("c")));
1582 assert!((!"a".ends_with("abc")));
1583 assert!((!"".ends_with("abc")));
1584 assert!((!"ddö".ends_with("-")));
1585 assert!(("ddö".ends_with("dö")));
1589 fn test_is_empty() {
1590 assert!("".is_empty());
1591 assert!(!"a".is_empty());
1597 assert_eq!("".replace(a, "b"), String::from_str(""));
1598 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1599 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1601 assert!(" test test ".replace(test, "toast") ==
1602 String::from_str(" toast toast "));
1603 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1607 fn test_replace_2a() {
1608 let data = "ประเทศไทย中华";
1609 let repl = "دولة الكويت";
1612 let a2 = "دولة الكويتทศไทย中华";
1613 assert_eq!(data.replace(a, repl), a2);
1617 fn test_replace_2b() {
1618 let data = "ประเทศไทย中华";
1619 let repl = "دولة الكويت";
1622 let b2 = "ปรدولة الكويتทศไทย中华";
1623 assert_eq!(data.replace(b, repl), b2);
1627 fn test_replace_2c() {
1628 let data = "ประเทศไทย中华";
1629 let repl = "دولة الكويت";
1632 let c2 = "ประเทศไทยدولة الكويت";
1633 assert_eq!(data.replace(c, repl), c2);
1637 fn test_replace_2d() {
1638 let data = "ประเทศไทย中华";
1639 let repl = "دولة الكويت";
1642 assert_eq!(data.replace(d, repl), data);
1647 assert_eq!("ab", "abc".slice(0, 2));
1648 assert_eq!("bc", "abc".slice(1, 3));
1649 assert_eq!("", "abc".slice(1, 1));
1650 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1652 let data = "ประเทศไทย中华";
1653 assert_eq!("ป", data.slice(0, 3));
1654 assert_eq!("ร", data.slice(3, 6));
1655 assert_eq!("", data.slice(3, 3));
1656 assert_eq!("华", data.slice(30, 33));
1658 fn a_million_letter_x() -> String {
1660 let mut rs = String::new();
1662 rs.push_str("华华华华华华华华华华");
1667 fn half_a_million_letter_x() -> String {
1669 let mut rs = String::new();
1671 rs.push_str("华华华华华");
1676 let letters = a_million_letter_x();
1677 assert!(half_a_million_letter_x() ==
1678 String::from_str(letters.slice(0u, 3u * 500000u)));
1683 let ss = "中华Việt Nam";
1685 assert_eq!("华", ss.slice(3u, 6u));
1686 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1688 assert_eq!("ab", "abc".slice(0u, 2u));
1689 assert_eq!("bc", "abc".slice(1u, 3u));
1690 assert_eq!("", "abc".slice(1u, 1u));
1692 assert_eq!("中", ss.slice(0u, 3u));
1693 assert_eq!("华V", ss.slice(3u, 7u));
1694 assert_eq!("", ss.slice(3u, 3u));
1709 fn test_slice_fail() {
1710 "中华Việt Nam".slice(0u, 2u);
1714 fn test_slice_from() {
1715 assert_eq!("abcd".slice_from(0), "abcd");
1716 assert_eq!("abcd".slice_from(2), "cd");
1717 assert_eq!("abcd".slice_from(4), "");
1720 fn test_slice_to() {
1721 assert_eq!("abcd".slice_to(0), "");
1722 assert_eq!("abcd".slice_to(2), "ab");
1723 assert_eq!("abcd".slice_to(4), "abcd");
1727 fn test_trim_left_matches() {
1728 let v: &[char] = &[];
1729 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1730 let chars: &[char] = &['*', ' '];
1731 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1732 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1733 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1735 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1736 let chars: &[char] = &['1', '2'];
1737 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1738 assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1742 fn test_trim_right_matches() {
1743 let v: &[char] = &[];
1744 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1745 let chars: &[char] = &['*', ' '];
1746 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1747 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1748 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1750 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1751 let chars: &[char] = &['1', '2'];
1752 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1753 assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1757 fn test_trim_matches() {
1758 let v: &[char] = &[];
1759 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1760 let chars: &[char] = &['*', ' '];
1761 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1762 assert_eq!(" *** *** ".trim_matches(chars), "");
1763 assert_eq!("foo".trim_matches(chars), "foo");
1765 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1766 let chars: &[char] = &['1', '2'];
1767 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1768 assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1772 fn test_trim_left() {
1773 assert_eq!("".trim_left(), "");
1774 assert_eq!("a".trim_left(), "a");
1775 assert_eq!(" ".trim_left(), "");
1776 assert_eq!(" blah".trim_left(), "blah");
1777 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1778 assert_eq!("hey ".trim_left(), "hey ");
1782 fn test_trim_right() {
1783 assert_eq!("".trim_right(), "");
1784 assert_eq!("a".trim_right(), "a");
1785 assert_eq!(" ".trim_right(), "");
1786 assert_eq!("blah ".trim_right(), "blah");
1787 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1788 assert_eq!(" hey".trim_right(), " hey");
1793 assert_eq!("".trim(), "");
1794 assert_eq!("a".trim(), "a");
1795 assert_eq!(" ".trim(), "");
1796 assert_eq!(" blah ".trim(), "blah");
1797 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1798 assert_eq!(" hey dude ".trim(), "hey dude");
1802 fn test_is_whitespace() {
1803 assert!("".chars().all(|c| c.is_whitespace()));
1804 assert!(" ".chars().all(|c| c.is_whitespace()));
1805 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1806 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1807 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1811 fn test_slice_shift_char() {
1812 let data = "ประเทศไทย中";
1813 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1817 fn test_slice_shift_char_2() {
1819 assert_eq!(empty.slice_shift_char(), None);
1824 // deny overlong encodings
1825 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1826 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1827 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1828 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1829 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1830 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1831 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1834 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1835 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1837 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1838 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1839 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1840 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1841 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1842 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1843 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1844 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1848 fn test_is_utf16() {
1849 use unicode::str::is_utf16;
1850 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1858 // surrogate pairs (randomly generated with Python 3's
1859 // .encode('utf-16be'))
1860 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1861 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1862 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1864 // mixtures (also random)
1865 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1866 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1867 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1870 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1873 // surrogate + regular unit
1875 // surrogate + lead surrogate
1877 // unterminated surrogate
1879 // trail surrogate without a lead
1882 // random byte sequences that Python 3's .decode('utf-16be')
1884 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1885 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1886 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1887 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1888 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1889 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1890 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1891 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1892 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1893 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1894 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1895 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1896 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1897 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1898 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1899 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1900 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1901 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1902 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1903 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1904 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1908 fn test_as_bytes() {
1911 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1912 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1916 assert_eq!("".as_bytes(), b);
1917 assert_eq!("abc".as_bytes(), b"abc");
1918 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1923 fn test_as_bytes_fail() {
1924 // Don't double free. (I'm not sure if this exercises the
1925 // original problem code path anymore.)
1926 let s = String::from_str("");
1927 let _bytes = s.as_bytes();
1933 let buf = "hello".as_ptr();
1935 assert_eq!(*buf.offset(0), b'h');
1936 assert_eq!(*buf.offset(1), b'e');
1937 assert_eq!(*buf.offset(2), b'l');
1938 assert_eq!(*buf.offset(3), b'l');
1939 assert_eq!(*buf.offset(4), b'o');
1944 fn test_subslice_offset() {
1945 let a = "kernelsprite";
1946 let b = a.slice(7, a.len());
1947 let c = a.slice(0, a.len() - 6);
1948 assert_eq!(a.subslice_offset(b), 7);
1949 assert_eq!(a.subslice_offset(c), 0);
1951 let string = "a\nb\nc";
1952 let lines: Vec<&str> = string.lines().collect();
1953 assert_eq!(string.subslice_offset(lines[0]), 0);
1954 assert_eq!(string.subslice_offset(lines[1]), 2);
1955 assert_eq!(string.subslice_offset(lines[2]), 4);
1960 fn test_subslice_offset_2() {
1961 let a = "alchemiter";
1962 let b = "cruxtruder";
1963 a.subslice_offset(b);
1967 fn vec_str_conversions() {
1968 let s1: String = String::from_str("All mimsy were the borogoves");
1970 let v: Vec<u8> = s1.as_bytes().to_vec();
1971 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1972 let mut i: uint = 0u;
1973 let n1: uint = s1.len();
1974 let n2: uint = v.len();
1977 let a: u8 = s1.as_bytes()[i];
1978 let b: u8 = s2.as_bytes()[i];
1987 fn test_contains() {
1988 assert!("abcde".contains("bcd"));
1989 assert!("abcde".contains("abcd"));
1990 assert!("abcde".contains("bcde"));
1991 assert!("abcde".contains(""));
1992 assert!("".contains(""));
1993 assert!(!"abcde".contains("def"));
1994 assert!(!"".contains("a"));
1996 let data = "ประเทศไทย中华Việt Nam";
1997 assert!(data.contains("ประเ"));
1998 assert!(data.contains("ะเ"));
1999 assert!(data.contains("中华"));
2000 assert!(!data.contains("ไท华"));
2004 fn test_contains_char() {
2005 assert!("abc".contains_char('b'));
2006 assert!("a".contains_char('a'));
2007 assert!(!"abc".contains_char('d'));
2008 assert!(!"".contains_char('a'));
2013 let s = "ศไทย中华Việt Nam";
2014 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2016 for ch in v.iter() {
2017 assert!(s.char_at(pos) == *ch);
2018 pos += ch.to_string().len();
2023 fn test_char_at_reverse() {
2024 let s = "ศไทย中华Việt Nam";
2025 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2026 let mut pos = s.len();
2027 for ch in v.iter().rev() {
2028 assert!(s.char_at_reverse(pos) == *ch);
2029 pos -= ch.to_string().len();
2034 fn test_escape_unicode() {
2035 assert_eq!("abc".escape_unicode(),
2036 String::from_str("\\u{61}\\u{62}\\u{63}"));
2037 assert_eq!("a c".escape_unicode(),
2038 String::from_str("\\u{61}\\u{20}\\u{63}"));
2039 assert_eq!("\r\n\t".escape_unicode(),
2040 String::from_str("\\u{d}\\u{a}\\u{9}"));
2041 assert_eq!("'\"\\".escape_unicode(),
2042 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2043 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2044 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2045 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2046 String::from_str("\\u{100}\\u{ffff}"));
2047 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2048 String::from_str("\\u{10000}\\u{10ffff}"));
2049 assert_eq!("ab\u{fb00}".escape_unicode(),
2050 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2051 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2052 String::from_str("\\u{1d4ea}\\u{d}"));
2056 fn test_escape_default() {
2057 assert_eq!("abc".escape_default(), String::from_str("abc"));
2058 assert_eq!("a c".escape_default(), String::from_str("a c"));
2059 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2060 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2061 assert_eq!("\u{100}\u{ffff}".escape_default(),
2062 String::from_str("\\u{100}\\u{ffff}"));
2063 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2064 String::from_str("\\u{10000}\\u{10ffff}"));
2065 assert_eq!("ab\u{fb00}".escape_default(),
2066 String::from_str("ab\\u{fb00}"));
2067 assert_eq!("\u{1d4ea}\r".escape_default(),
2068 String::from_str("\\u{1d4ea}\\r"));
2072 fn test_total_ord() {
2073 "1234".cmp("123") == Greater;
2074 "123".cmp("1234") == Less;
2075 "1234".cmp("1234") == Equal;
2076 "12345555".cmp("123456") == Less;
2077 "22".cmp("1234") == Greater;
2081 fn test_char_range_at() {
2082 let data = "b¢€𤭢𤭢€¢b";
2083 assert_eq!('b', data.char_range_at(0).ch);
2084 assert_eq!('¢', data.char_range_at(1).ch);
2085 assert_eq!('€', data.char_range_at(3).ch);
2086 assert_eq!('𤭢', data.char_range_at(6).ch);
2087 assert_eq!('𤭢', data.char_range_at(10).ch);
2088 assert_eq!('€', data.char_range_at(14).ch);
2089 assert_eq!('¢', data.char_range_at(17).ch);
2090 assert_eq!('b', data.char_range_at(19).ch);
2094 fn test_char_range_at_reverse_underflow() {
2095 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2099 fn test_iterator() {
2100 let s = "ศไทย中华Việt Nam";
2101 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2104 let mut it = s.chars();
2107 assert_eq!(c, v[pos]);
2110 assert_eq!(pos, v.len());
2114 fn test_rev_iterator() {
2115 let s = "ศไทย中华Việt Nam";
2116 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2119 let mut it = s.chars().rev();
2122 assert_eq!(c, v[pos]);
2125 assert_eq!(pos, v.len());
2129 fn test_chars_decoding() {
2130 let mut bytes = [0u8; 4];
2131 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2132 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2133 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2134 if Some(c) != s.chars().next() {
2135 panic!("character {:x}={} does not decode correctly", c as u32, c);
2141 fn test_chars_rev_decoding() {
2142 let mut bytes = [0u8; 4];
2143 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2144 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2145 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2146 if Some(c) != s.chars().rev().next() {
2147 panic!("character {:x}={} does not decode correctly", c as u32, c);
2153 fn test_iterator_clone() {
2154 let s = "ศไทย中华Việt Nam";
2155 let mut it = s.chars();
2157 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2161 fn test_bytesator() {
2162 let s = "ศไทย中华Việt Nam";
2164 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2165 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2170 for b in s.bytes() {
2171 assert_eq!(b, v[pos]);
2177 fn test_bytes_revator() {
2178 let s = "ศไทย中华Việt Nam";
2180 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2181 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2184 let mut pos = v.len();
2186 for b in s.bytes().rev() {
2188 assert_eq!(b, v[pos]);
2193 fn test_char_indicesator() {
2194 let s = "ศไทย中华Việt Nam";
2195 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2196 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2199 let mut it = s.char_indices();
2202 assert_eq!(c, (p[pos], v[pos]));
2205 assert_eq!(pos, v.len());
2206 assert_eq!(pos, p.len());
2210 fn test_char_indices_revator() {
2211 let s = "ศไทย中华Việt Nam";
2212 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2213 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2216 let mut it = s.char_indices().rev();
2219 assert_eq!(c, (p[pos], v[pos]));
2222 assert_eq!(pos, v.len());
2223 assert_eq!(pos, p.len());
2227 fn test_splitn_char_iterator() {
2228 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2230 let split: Vec<&str> = data.splitn(3, ' ').collect();
2231 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2233 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2234 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2237 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2238 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2240 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2241 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2245 fn test_split_char_iterator_no_trailing() {
2246 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2248 let split: Vec<&str> = data.split('\n').collect();
2249 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2251 let split: Vec<&str> = data.split_terminator('\n').collect();
2252 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2257 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2258 let words: Vec<&str> = data.words().collect();
2259 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2263 fn test_nfd_chars() {
2265 ($input: expr, $expected: expr) => {
2266 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2270 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2271 t!("\u{2026}", "\u{2026}");
2272 t!("\u{2126}", "\u{3a9}");
2273 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2274 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2275 t!("a\u{301}", "a\u{301}");
2276 t!("\u{301}a", "\u{301}a");
2277 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2278 t!("\u{ac1c}", "\u{1100}\u{1162}");
2282 fn test_nfkd_chars() {
2284 ($input: expr, $expected: expr) => {
2285 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2289 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2290 t!("\u{2026}", "...");
2291 t!("\u{2126}", "\u{3a9}");
2292 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2293 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2294 t!("a\u{301}", "a\u{301}");
2295 t!("\u{301}a", "\u{301}a");
2296 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2297 t!("\u{ac1c}", "\u{1100}\u{1162}");
2301 fn test_nfc_chars() {
2303 ($input: expr, $expected: expr) => {
2304 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2308 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2309 t!("\u{2026}", "\u{2026}");
2310 t!("\u{2126}", "\u{3a9}");
2311 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2312 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2313 t!("a\u{301}", "\u{e1}");
2314 t!("\u{301}a", "\u{301}a");
2315 t!("\u{d4db}", "\u{d4db}");
2316 t!("\u{ac1c}", "\u{ac1c}");
2317 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2321 fn test_nfkc_chars() {
2323 ($input: expr, $expected: expr) => {
2324 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2328 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2329 t!("\u{2026}", "...");
2330 t!("\u{2126}", "\u{3a9}");
2331 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2332 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2333 t!("a\u{301}", "\u{e1}");
2334 t!("\u{301}a", "\u{301}a");
2335 t!("\u{d4db}", "\u{d4db}");
2336 t!("\u{ac1c}", "\u{ac1c}");
2337 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2342 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2343 let lines: Vec<&str> = data.lines().collect();
2344 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2346 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2347 let lines: Vec<&str> = data.lines().collect();
2348 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2352 fn test_graphemes() {
2353 use core::iter::order;
2354 // official Unicode test data
2355 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2356 let test_same: [(_, &[_]); 325] = [
2357 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2358 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2359 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2360 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2361 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2362 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2363 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2364 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2365 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2366 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2367 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2368 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2369 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2370 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2371 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2372 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2373 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2374 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2375 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2376 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2377 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2378 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2379 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2380 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2381 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2382 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2383 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2384 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2385 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2386 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2387 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2388 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2389 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2390 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2391 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2392 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2393 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2394 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2395 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2396 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2397 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2398 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2399 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2400 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2401 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2402 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2403 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2404 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2405 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2406 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2407 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2408 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2409 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2410 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2411 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2412 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2413 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2414 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2415 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2416 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2417 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2418 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2419 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2420 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2421 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2422 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2423 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2424 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2425 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2426 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2427 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2428 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2429 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2430 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2431 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2432 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2433 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2434 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2435 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2436 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2437 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2438 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2439 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2440 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2441 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2442 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2443 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2444 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2445 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2446 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2447 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2448 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2449 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2450 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2451 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2452 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2453 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2454 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2455 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2456 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2457 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2458 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2459 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2460 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2461 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2462 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2463 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2464 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2465 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2466 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2467 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2468 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2469 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2470 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2471 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2472 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2473 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2474 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2475 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2476 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2477 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2478 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2479 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2480 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2481 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2482 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2483 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2484 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2485 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2486 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2487 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2488 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2489 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2490 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2491 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2492 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2493 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2494 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2495 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2496 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2497 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2498 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2499 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2500 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2501 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2502 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2503 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2504 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2505 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2506 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2507 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2508 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2509 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2510 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2511 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2512 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2513 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2514 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2515 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2516 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2517 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2518 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2519 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2520 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2521 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2522 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2523 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2524 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2525 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2526 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2527 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2528 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2529 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2530 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2531 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2532 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2533 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2534 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2535 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2536 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2537 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2538 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2539 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2540 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2541 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2542 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2543 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2544 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2545 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2546 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2547 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2548 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2549 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2550 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2551 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2552 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2553 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2554 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2555 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2556 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2557 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2558 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2559 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2560 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2561 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2562 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2563 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2564 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2565 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2566 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2567 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2568 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2569 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2570 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2571 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2572 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2573 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2574 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2575 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2576 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2577 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2578 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2579 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2580 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2581 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2582 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2583 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2584 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2585 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2586 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2587 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2588 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2589 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2590 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2591 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2592 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2593 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2594 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2595 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2596 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2597 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2598 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2599 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2600 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2601 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2602 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2603 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2604 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2605 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2606 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2607 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2608 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2609 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2610 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2611 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2612 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2613 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2614 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2615 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2616 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2617 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2618 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2619 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2620 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2621 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2622 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2623 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2624 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2625 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2626 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2627 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2628 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2629 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2630 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2631 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2632 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2633 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2634 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2635 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2636 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2637 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2638 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2639 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2640 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2641 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2642 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2643 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2644 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2645 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2646 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2647 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2648 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2649 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2650 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2651 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2652 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2653 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2654 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2655 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2656 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2657 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2658 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2659 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2660 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2661 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2662 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2663 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2664 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2665 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2666 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2667 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2668 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2669 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2670 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2671 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2672 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2673 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2674 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2675 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2676 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2677 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2678 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2679 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2680 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2681 "\u{1F1E7}\u{1F1E8}"]),
2682 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2683 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2684 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2685 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2688 let test_diff: [(_, &[_], &[_]); 23] = [
2689 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2690 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2691 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2692 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2693 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2694 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2695 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2696 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2697 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2698 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2699 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2700 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2701 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2702 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2703 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2704 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2705 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2706 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2707 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2708 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2709 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2710 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2711 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2714 for &(s, g) in test_same.iter() {
2715 // test forward iterator
2716 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2717 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2719 // test reverse iterator
2720 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2721 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2724 for &(s, gt, gf) in test_diff.iter() {
2725 // test forward iterator
2726 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2727 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2729 // test reverse iterator
2730 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2731 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2734 // test the indices iterators
2735 let s = "a̐éö̲\r\n";
2736 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2737 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2738 assert_eq!(gr_inds, b);
2739 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2740 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2741 assert_eq!(gr_inds, b);
2742 let mut gr_inds_iter = s.grapheme_indices(true);
2744 let gr_inds = gr_inds_iter.by_ref();
2745 let e1 = gr_inds.size_hint();
2746 assert_eq!(e1, (1, Some(13)));
2747 let c = gr_inds.count();
2750 let e2 = gr_inds_iter.size_hint();
2751 assert_eq!(e2, (0, Some(0)));
2753 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2755 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2756 let b: &[_] = &["\r", "\r\n", "\n"];
2761 fn test_split_strator() {
2762 fn t(s: &str, sep: &str, u: &[&str]) {
2763 let v: Vec<&str> = s.split_str(sep).collect();
2766 t("--1233345--", "12345", &["--1233345--"]);
2767 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2768 t("::hello::there", "::", &["", "hello", "there"]);
2769 t("hello::there::", "::", &["hello", "there", ""]);
2770 t("::hello::there::", "::", &["", "hello", "there", ""]);
2771 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2772 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2773 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2774 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2776 t("zz", "zz", &["",""]);
2777 t("ok", "z", &["ok"]);
2778 t("zzz", "zz", &["","z"]);
2779 t("zzzzz", "zz", &["","","z"]);
2783 fn test_str_default() {
2784 use core::default::Default;
2785 fn t<S: Default + Str>() {
2786 let s: S = Default::default();
2787 assert_eq!(s.as_slice(), "");
2795 fn test_str_container() {
2796 fn sum_len(v: &[&str]) -> uint {
2797 v.iter().map(|x| x.len()).sum()
2800 let s = String::from_str("01234");
2801 assert_eq!(5, sum_len(&["012", "", "34"]));
2802 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2803 String::from_str("2").as_slice(),
2804 String::from_str("34").as_slice(),
2805 String::from_str("").as_slice()]));
2806 assert_eq!(5, sum_len(&[s.as_slice()]));
2810 fn test_str_from_utf8() {
2812 assert_eq!(from_utf8(xs), Ok("hello"));
2814 let xs = "ศไทย中华Việt Nam".as_bytes();
2815 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2817 let xs = b"hello\xFF";
2818 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2825 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2827 use test::black_box;
2830 fn char_iterator(b: &mut Bencher) {
2831 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2833 b.iter(|| s.chars().count());
2837 fn char_iterator_for(b: &mut Bencher) {
2838 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2841 for ch in s.chars() { black_box(ch) }
2846 fn char_iterator_ascii(b: &mut Bencher) {
2847 let s = "Mary had a little lamb, Little lamb
2848 Mary had a little lamb, Little lamb
2849 Mary had a little lamb, Little lamb
2850 Mary had a little lamb, Little lamb
2851 Mary had a little lamb, Little lamb
2852 Mary had a little lamb, Little lamb";
2854 b.iter(|| s.chars().count());
2858 fn char_iterator_rev(b: &mut Bencher) {
2859 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2861 b.iter(|| s.chars().rev().count());
2865 fn char_iterator_rev_for(b: &mut Bencher) {
2866 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2869 for ch in s.chars().rev() { black_box(ch) }
2874 fn char_indicesator(b: &mut Bencher) {
2875 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2876 let len = s.chars().count();
2878 b.iter(|| assert_eq!(s.char_indices().count(), len));
2882 fn char_indicesator_rev(b: &mut Bencher) {
2883 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2884 let len = s.chars().count();
2886 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2890 fn split_unicode_ascii(b: &mut Bencher) {
2891 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2893 b.iter(|| assert_eq!(s.split('V').count(), 3));
2897 fn split_unicode_not_ascii(b: &mut Bencher) {
2898 struct NotAscii(char);
2899 impl CharEq for NotAscii {
2900 fn matches(&mut self, c: char) -> bool {
2901 let NotAscii(cc) = *self;
2904 fn only_ascii(&self) -> bool { false }
2906 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2908 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2913 fn split_ascii(b: &mut Bencher) {
2914 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2915 let len = s.split(' ').count();
2917 b.iter(|| assert_eq!(s.split(' ').count(), len));
2921 fn split_not_ascii(b: &mut Bencher) {
2922 struct NotAscii(char);
2923 impl CharEq for NotAscii {
2925 fn matches(&mut self, c: char) -> bool {
2926 let NotAscii(cc) = *self;
2929 fn only_ascii(&self) -> bool { false }
2931 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2932 let len = s.split(' ').count();
2934 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2938 fn split_extern_fn(b: &mut Bencher) {
2939 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2940 let len = s.split(' ').count();
2941 fn pred(c: char) -> bool { c == ' ' }
2943 b.iter(|| assert_eq!(s.split(pred).count(), len));
2947 fn split_closure(b: &mut Bencher) {
2948 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2949 let len = s.split(' ').count();
2951 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2955 fn split_slice(b: &mut Bencher) {
2956 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2957 let len = s.split(' ').count();
2959 let c: &[char] = &[' '];
2960 b.iter(|| assert_eq!(s.split(c).count(), len));
2964 fn bench_connect(b: &mut Bencher) {
2965 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2967 let v = vec![s, s, s, s, s, s, s, s, s, s];
2969 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2974 fn bench_contains_short_short(b: &mut Bencher) {
2975 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2979 assert!(haystack.contains(needle));
2984 fn bench_contains_short_long(b: &mut Bencher) {
2986 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2987 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2988 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2989 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2990 tempus vel, gravida nec quam.
2992 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2993 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2994 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2995 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2996 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2997 interdum. Curabitur ut nisi justo.
2999 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
3000 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
3001 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
3002 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
3003 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
3004 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3005 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3006 Aliquam sit amet placerat lorem.
3008 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3009 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3010 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3011 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3012 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3015 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3016 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3017 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3018 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3019 malesuada sollicitudin quam eu fermentum.";
3020 let needle = "english";
3023 assert!(!haystack.contains(needle));
3028 fn bench_contains_bad_naive(b: &mut Bencher) {
3029 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3030 let needle = "aaaaaaaab";
3033 assert!(!haystack.contains(needle));
3038 fn bench_contains_equal(b: &mut Bencher) {
3039 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3040 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3043 assert!(haystack.contains(needle));