1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
54 use self::RecompositionState::*;
55 use self::DecompositionType::*;
57 use core::borrow::{BorrowFrom, ToOwned};
58 use core::char::CharExt;
59 use core::clone::Clone;
60 use core::iter::AdditiveIterator;
61 use core::iter::{range, Iterator, IteratorExt};
63 use core::option::Option::{self, Some, None};
64 use core::slice::AsSlice;
65 use core::str as core_str;
66 use unicode::str::{UnicodeStr, Utf16Encoder};
68 use ring_buf::RingBuf;
73 use slice::SliceConcatExt;
75 pub use core::str::{FromStr, Utf8Error, Str};
76 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
77 pub use core::str::{Split, SplitTerminator};
78 pub use core::str::{SplitN, RSplitN};
79 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
80 pub use core::str::{from_utf8_unchecked, from_c_str};
81 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
84 Section: Creating a string
87 impl<S: Str> SliceConcatExt<str, String> for [S] {
88 fn concat(&self) -> String {
89 let s = self.as_slice();
95 // `len` calculation may overflow but push_str will check boundaries
96 let len = s.iter().map(|s| s.as_slice().len()).sum();
97 let mut result = String::with_capacity(len);
100 result.push_str(s.as_slice())
106 fn connect(&self, sep: &str) -> String {
107 let s = self.as_slice();
110 return String::new();
118 // this is wrong without the guarantee that `self` is non-empty
119 // `len` calculation may overflow but push_str but will check boundaries
120 let len = sep.len() * (s.len() - 1)
121 + s.iter().map(|s| s.as_slice().len()).sum();
122 let mut result = String::with_capacity(len);
123 let mut first = true;
129 result.push_str(sep);
131 result.push_str(s.as_slice());
141 // Helper functions used for Unicode normalization
142 fn canonical_sort(comb: &mut [(char, u8)]) {
143 let len = comb.len();
144 for i in range(0, len) {
145 let mut swapped = false;
146 for j in range(1, len-i) {
147 let class_a = comb[j-1].1;
148 let class_b = comb[j].1;
149 if class_a != 0 && class_b != 0 && class_a > class_b {
154 if !swapped { break; }
159 enum DecompositionType {
164 /// External iterator for a string's decomposition's characters.
165 /// Use with the `std::iter` module.
167 pub struct Decompositions<'a> {
168 kind: DecompositionType,
170 buffer: Vec<(char, u8)>,
174 impl<'a> Iterator for Decompositions<'a> {
178 fn next(&mut self) -> Option<char> {
179 match self.buffer.first() {
182 self.buffer.remove(0);
185 Some(&(c, _)) if self.sorted => {
186 self.buffer.remove(0);
189 _ => self.sorted = false
193 for ch in self.iter {
194 let buffer = &mut self.buffer;
195 let sorted = &mut self.sorted;
197 let callback = |&mut: d| {
199 unicode::char::canonical_combining_class(d);
200 if class == 0 && !*sorted {
201 canonical_sort(buffer.as_mut_slice());
204 buffer.push((d, class));
208 unicode::char::decompose_canonical(ch, callback)
211 unicode::char::decompose_compatible(ch, callback)
222 canonical_sort(self.buffer.as_mut_slice());
226 if self.buffer.is_empty() {
229 match self.buffer.remove(0) {
239 fn size_hint(&self) -> (uint, Option<uint>) {
240 let (lower, _) = self.iter.size_hint();
246 enum RecompositionState {
252 /// External iterator for a string's recomposition's characters.
253 /// Use with the `std::iter` module.
255 pub struct Recompositions<'a> {
256 iter: Decompositions<'a>,
257 state: RecompositionState,
258 buffer: RingBuf<char>,
259 composee: Option<char>,
263 impl<'a> Iterator for Recompositions<'a> {
267 fn next(&mut self) -> Option<char> {
271 for ch in self.iter {
272 let ch_class = unicode::char::canonical_combining_class(ch);
273 if self.composee.is_none() {
277 self.composee = Some(ch);
280 let k = self.composee.clone().unwrap();
282 match self.last_ccc {
284 match unicode::char::compose(k, ch) {
286 self.composee = Some(r);
291 self.composee = Some(ch);
294 self.buffer.push_back(ch);
295 self.last_ccc = Some(ch_class);
300 if l_class >= ch_class {
301 // `ch` is blocked from `composee`
303 self.composee = Some(ch);
304 self.last_ccc = None;
305 self.state = Purging;
308 self.buffer.push_back(ch);
309 self.last_ccc = Some(ch_class);
312 match unicode::char::compose(k, ch) {
314 self.composee = Some(r);
318 self.buffer.push_back(ch);
319 self.last_ccc = Some(ch_class);
325 self.state = Finished;
326 if self.composee.is_some() {
327 return self.composee.take();
331 match self.buffer.pop_front() {
332 None => self.state = Composing,
337 match self.buffer.pop_front() {
338 None => return self.composee.take(),
347 /// External iterator for a string's UTF16 codeunits.
348 /// Use with the `std::iter` module.
350 pub struct Utf16Units<'a> {
351 encoder: Utf16Encoder<Chars<'a>>
354 impl<'a> Iterator for Utf16Units<'a> {
358 fn next(&mut self) -> Option<u16> { self.encoder.next() }
361 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
368 // Return the initial codepoint accumulator for the first byte.
369 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
370 // for width 3, and 3 bits for width 4
371 macro_rules! utf8_first_byte {
372 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
375 // return the value of $ch updated with continuation byte $byte
376 macro_rules! utf8_acc_cont_byte {
377 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
380 #[unstable = "trait is unstable"]
381 impl BorrowFrom<String> for str {
382 fn borrow_from(owned: &String) -> &str { owned[] }
385 #[unstable = "trait is unstable"]
386 impl ToOwned<String> for str {
387 fn to_owned(&self) -> String {
389 String::from_utf8_unchecked(self.as_bytes().to_owned())
399 Section: Trait implementations
402 /// Any string that can be represented as a slice.
403 pub trait StrExt: ops::Slice<uint, str> {
404 /// Escapes each char in `s` with `char::escape_default`.
405 #[unstable = "return type may change to be an iterator"]
406 fn escape_default(&self) -> String {
407 self.chars().flat_map(|c| c.escape_default()).collect()
410 /// Escapes each char in `s` with `char::escape_unicode`.
411 #[unstable = "return type may change to be an iterator"]
412 fn escape_unicode(&self) -> String {
413 self.chars().flat_map(|c| c.escape_unicode()).collect()
416 /// Replaces all occurrences of one string with another.
420 /// * `from` - The string to replace
421 /// * `to` - The replacement string
425 /// The original string with all occurrences of `from` replaced with `to`.
430 /// let s = "Do you know the muffin man,
431 /// The muffin man, the muffin man, ...".to_string();
433 /// assert_eq!(s.replace("muffin man", "little lamb"),
434 /// "Do you know the little lamb,
435 /// The little lamb, the little lamb, ...".to_string());
437 /// // not found, so no change.
438 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
441 fn replace(&self, from: &str, to: &str) -> String {
442 let mut result = String::new();
443 let mut last_end = 0;
444 for (start, end) in self.match_indices(from) {
445 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
449 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
453 /// Returns an iterator over the string in Unicode Normalization Form D
454 /// (canonical decomposition).
456 #[unstable = "this functionality may be moved to libunicode"]
457 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
459 iter: self[].chars(),
466 /// Returns an iterator over the string in Unicode Normalization Form KD
467 /// (compatibility decomposition).
469 #[unstable = "this functionality may be moved to libunicode"]
470 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
472 iter: self[].chars(),
479 /// An Iterator over the string in Unicode Normalization Form C
480 /// (canonical decomposition followed by canonical composition).
482 #[unstable = "this functionality may be moved to libunicode"]
483 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
485 iter: self.nfd_chars(),
487 buffer: RingBuf::new(),
493 /// An Iterator over the string in Unicode Normalization Form KC
494 /// (compatibility decomposition followed by canonical composition).
496 #[unstable = "this functionality may be moved to libunicode"]
497 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
499 iter: self.nfkd_chars(),
501 buffer: RingBuf::new(),
507 /// Returns true if a string contains a string pattern.
511 /// - pat - The string pattern to look for
516 /// assert!("bananas".contains("nana"));
519 fn contains(&self, pat: &str) -> bool {
520 core_str::StrExt::contains(self[], pat)
523 /// Returns true if a string contains a char pattern.
527 /// - pat - The char pattern to look for
532 /// assert!("hello".contains_char('e'));
534 #[unstable = "might get removed in favour of a more generic contains()"]
535 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
536 core_str::StrExt::contains_char(self[], pat)
539 /// An iterator over the characters of `self`. Note, this iterates
540 /// over Unicode code-points, not Unicode graphemes.
545 /// let v: Vec<char> = "abc åäö".chars().collect();
546 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
549 fn chars(&self) -> Chars {
550 core_str::StrExt::chars(self[])
553 /// An iterator over the bytes of `self`
558 /// let v: Vec<u8> = "bors".bytes().collect();
559 /// assert_eq!(v, b"bors".to_vec());
562 fn bytes(&self) -> Bytes {
563 core_str::StrExt::bytes(self[])
566 /// An iterator over the characters of `self` and their byte offsets.
568 fn char_indices(&self) -> CharIndices {
569 core_str::StrExt::char_indices(self[])
572 /// An iterator over substrings of `self`, separated by characters
573 /// matched by the pattern `pat`.
578 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
579 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
581 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
582 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
584 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
585 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
587 /// let v: Vec<&str> = "".split('X').collect();
588 /// assert_eq!(v, vec![""]);
591 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
592 core_str::StrExt::split(self[], pat)
595 /// An iterator over substrings of `self`, separated by characters
596 /// matched by the pattern `pat`, restricted to splitting at most `count`
602 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
603 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
605 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
606 /// assert_eq!(v, vec!["abc", "def2ghi"]);
608 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
609 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
611 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
612 /// assert_eq!(v, vec!["abcXdef"]);
614 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
615 /// assert_eq!(v, vec![""]);
618 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
619 core_str::StrExt::splitn(self[], count, pat)
622 /// An iterator over substrings of `self`, separated by characters
623 /// matched by the pattern `pat`.
625 /// Equivalent to `split`, except that the trailing substring
626 /// is skipped if empty (terminator semantics).
631 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
632 /// assert_eq!(v, vec!["A", "B"]);
634 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
635 /// assert_eq!(v, vec!["A", "", "B", ""]);
637 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
638 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
640 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
641 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
643 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
644 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
646 #[unstable = "might get removed"]
647 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
648 core_str::StrExt::split_terminator(self[], pat)
651 /// An iterator over substrings of `self`, separated by characters
652 /// matched by the pattern `pat`, starting from the end of the string.
653 /// Restricted to splitting at most `count` times.
658 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
659 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
661 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
662 /// assert_eq!(v, vec!["ghi", "abc1def"]);
664 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
665 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
668 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
669 core_str::StrExt::rsplitn(self[], count, pat)
672 /// An iterator over the start and end indices of the disjoint
673 /// matches of the pattern `pat` within `self`.
675 /// That is, each returned value `(start, end)` satisfies
676 /// `self.slice(start, end) == sep`. For matches of `sep` within
677 /// `self` that overlap, only the indices corresponding to the
678 /// first match are returned.
683 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
684 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
686 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
687 /// assert_eq!(v, vec![(1,4), (4,7)]);
689 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
690 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
692 #[unstable = "might have its iterator type changed"]
693 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
694 core_str::StrExt::match_indices(self[], pat)
697 /// An iterator over the substrings of `self` separated by the pattern `sep`.
702 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
703 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
705 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
706 /// assert_eq!(v, vec!["1", "", "2"]);
708 #[unstable = "might get removed in the future in favor of a more generic split()"]
709 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
710 core_str::StrExt::split_str(self[], pat)
713 /// An iterator over the lines of a string (subsequences separated
714 /// by `\n`). This does not include the empty string after a
720 /// let four_lines = "foo\nbar\n\nbaz\n";
721 /// let v: Vec<&str> = four_lines.lines().collect();
722 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
725 fn lines(&self) -> Lines {
726 core_str::StrExt::lines(self[])
729 /// An iterator over the lines of a string, separated by either
730 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
731 /// empty trailing line.
736 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
737 /// let v: Vec<&str> = four_lines.lines_any().collect();
738 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
741 fn lines_any(&self) -> LinesAny {
742 core_str::StrExt::lines_any(self[])
745 /// Returns a slice of the given string from the byte range
746 /// [`begin`..`end`).
748 /// This operation is `O(1)`.
750 /// Panics when `begin` and `end` do not point to valid characters
751 /// or point beyond the last character of the string.
753 /// See also `slice_to` and `slice_from` for slicing prefixes and
754 /// suffixes of strings, and `slice_chars` for slicing based on
755 /// code point counts.
760 /// let s = "Löwe 老虎 Léopard";
761 /// assert_eq!(s.slice(0, 1), "L");
763 /// assert_eq!(s.slice(1, 9), "öwe 老");
765 /// // these will panic:
766 /// // byte 2 lies within `ö`:
767 /// // s.slice(2, 3);
769 /// // byte 8 lies within `老`
770 /// // s.slice(1, 8);
772 /// // byte 100 is outside the string
773 /// // s.slice(3, 100);
775 #[unstable = "use slice notation [a..b] instead"]
776 fn slice(&self, begin: uint, end: uint) -> &str {
777 core_str::StrExt::slice(self[], begin, end)
780 /// Returns a slice of the string from `begin` to its end.
782 /// Equivalent to `self.slice(begin, self.len())`.
784 /// Panics when `begin` does not point to a valid character, or is
787 /// See also `slice`, `slice_to` and `slice_chars`.
788 #[unstable = "use slice notation [a..] instead"]
789 fn slice_from(&self, begin: uint) -> &str {
790 core_str::StrExt::slice_from(self[], begin)
793 /// Returns a slice of the string from the beginning to byte
796 /// Equivalent to `self.slice(0, end)`.
798 /// Panics when `end` does not point to a valid character, or is
801 /// See also `slice`, `slice_from` and `slice_chars`.
802 #[unstable = "use slice notation [0..a] instead"]
803 fn slice_to(&self, end: uint) -> &str {
804 core_str::StrExt::slice_to(self[], end)
807 /// Returns a slice of the string from the character range
808 /// [`begin`..`end`).
810 /// That is, start at the `begin`-th code point of the string and
811 /// continue to the `end`-th code point. This does not detect or
812 /// handle edge cases such as leaving a combining character as the
813 /// first code point of the string.
815 /// Due to the design of UTF-8, this operation is `O(end)`.
816 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
817 /// variants that use byte indices rather than code point
820 /// Panics if `begin` > `end` or the either `begin` or `end` are
821 /// beyond the last character of the string.
826 /// let s = "Löwe 老虎 Léopard";
827 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
828 /// assert_eq!(s.slice_chars(5, 7), "老虎");
830 #[unstable = "may have yet to prove its worth"]
831 fn slice_chars(&self, begin: uint, end: uint) -> &str {
832 core_str::StrExt::slice_chars(self[], begin, end)
835 /// Takes a bytewise (not UTF-8) slice from a string.
837 /// Returns the substring from [`begin`..`end`).
839 /// Caller must check both UTF-8 character boundaries and the boundaries of
840 /// the entire slice as well.
842 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
843 core_str::StrExt::slice_unchecked(self[], begin, end)
846 /// Returns true if the pattern `pat` is a prefix of the string.
851 /// assert!("banana".starts_with("ba"));
854 fn starts_with(&self, pat: &str) -> bool {
855 core_str::StrExt::starts_with(self[], pat)
858 /// Returns true if the pattern `pat` is a suffix of the string.
863 /// assert!("banana".ends_with("nana"));
866 fn ends_with(&self, pat: &str) -> bool {
867 core_str::StrExt::ends_with(self[], pat)
870 /// Returns a string with all pre- and suffixes that match
871 /// the pattern `pat` repeatedly removed.
875 /// * pat - a string pattern
880 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
881 /// let x: &[_] = &['1', '2'];
882 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
883 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
886 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
887 core_str::StrExt::trim_matches(self[], pat)
890 /// Returns a string with all prefixes that match
891 /// the pattern `pat` repeatedly removed.
895 /// * pat - a string pattern
900 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
901 /// let x: &[_] = &['1', '2'];
902 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
903 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
906 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
907 core_str::StrExt::trim_left_matches(self[], pat)
910 /// Returns a string with all suffixes that match
911 /// the pattern `pat` repeatedly removed.
915 /// * pat - a string pattern
920 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
921 /// let x: &[_] = &['1', '2'];
922 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
923 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
926 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
927 core_str::StrExt::trim_right_matches(self[], pat)
930 /// Check that `index`-th byte lies at the start and/or end of a
931 /// UTF-8 code point sequence.
933 /// The start and end of the string (when `index == self.len()`)
934 /// are considered to be boundaries.
936 /// Panics if `index` is greater than `self.len()`.
941 /// let s = "Löwe 老虎 Léopard";
942 /// assert!(s.is_char_boundary(0));
944 /// assert!(s.is_char_boundary(6));
945 /// assert!(s.is_char_boundary(s.len()));
947 /// // second byte of `ö`
948 /// assert!(!s.is_char_boundary(2));
950 /// // third byte of `老`
951 /// assert!(!s.is_char_boundary(8));
953 #[unstable = "naming is uncertain with container conventions"]
954 fn is_char_boundary(&self, index: uint) -> bool {
955 core_str::StrExt::is_char_boundary(self[], index)
958 /// Pluck a character out of a string and return the index of the next
961 /// This function can be used to iterate over the Unicode characters of a
966 /// This example manually iterates through the characters of a
967 /// string; this should normally be done by `.chars()` or
971 /// use std::str::CharRange;
973 /// let s = "中华Việt Nam";
975 /// while i < s.len() {
976 /// let CharRange {ch, next} = s.char_range_at(i);
977 /// println!("{}: {}", i, ch);
1000 /// * i - The byte offset of the char to extract
1004 /// A record {ch: char, next: uint} containing the char value and the byte
1005 /// index of the next Unicode character.
1009 /// If `i` is greater than or equal to the length of the string.
1010 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1011 #[unstable = "naming is uncertain with container conventions"]
1012 fn char_range_at(&self, start: uint) -> CharRange {
1013 core_str::StrExt::char_range_at(self[], start)
1016 /// Given a byte position and a str, return the previous char and its position.
1018 /// This function can be used to iterate over a Unicode string in reverse.
1020 /// Returns 0 for next index if called on start index 0.
1024 /// If `i` is greater than the length of the string.
1025 /// If `i` is not an index following a valid UTF-8 character.
1026 #[unstable = "naming is uncertain with container conventions"]
1027 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1028 core_str::StrExt::char_range_at_reverse(self[], start)
1031 /// Plucks the character starting at the `i`th byte of a string.
1037 /// assert_eq!(s.char_at(1), 'b');
1038 /// assert_eq!(s.char_at(2), 'π');
1039 /// assert_eq!(s.char_at(4), 'c');
1044 /// If `i` is greater than or equal to the length of the string.
1045 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1046 #[unstable = "naming is uncertain with container conventions"]
1047 fn char_at(&self, i: uint) -> char {
1048 core_str::StrExt::char_at(self[], i)
1051 /// Plucks the character ending at the `i`th byte of a string.
1055 /// If `i` is greater than the length of the string.
1056 /// If `i` is not an index following a valid UTF-8 character.
1057 #[unstable = "naming is uncertain with container conventions"]
1058 fn char_at_reverse(&self, i: uint) -> char {
1059 core_str::StrExt::char_at_reverse(self[], i)
1062 /// Work with the byte buffer of a string as a byte slice.
1067 /// assert_eq!("bors".as_bytes(), b"bors");
1070 fn as_bytes(&self) -> &[u8] {
1071 core_str::StrExt::as_bytes(self[])
1074 /// Returns the byte index of the first character of `self` that
1075 /// matches the pattern `pat`.
1079 /// `Some` containing the byte index of the last matching character
1080 /// or `None` if there is no match
1085 /// let s = "Löwe 老虎 Léopard";
1087 /// assert_eq!(s.find('L'), Some(0));
1088 /// assert_eq!(s.find('é'), Some(14));
1090 /// // the first space
1091 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1093 /// // neither are found
1094 /// let x: &[_] = &['1', '2'];
1095 /// assert_eq!(s.find(x), None);
1098 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1099 core_str::StrExt::find(self[], pat)
1102 /// Returns the byte index of the last character of `self` that
1103 /// matches the pattern `pat`.
1107 /// `Some` containing the byte index of the last matching character
1108 /// or `None` if there is no match.
1113 /// let s = "Löwe 老虎 Léopard";
1115 /// assert_eq!(s.rfind('L'), Some(13));
1116 /// assert_eq!(s.rfind('é'), Some(14));
1118 /// // the second space
1119 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1121 /// // searches for an occurrence of either `1` or `2`, but neither are found
1122 /// let x: &[_] = &['1', '2'];
1123 /// assert_eq!(s.rfind(x), None);
1126 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1127 core_str::StrExt::rfind(self[], pat)
1130 /// Returns the byte index of the first matching substring
1134 /// * `needle` - The string to search for
1138 /// `Some` containing the byte index of the first matching substring
1139 /// or `None` if there is no match.
1144 /// let s = "Löwe 老虎 Léopard";
1146 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1147 /// assert_eq!(s.find_str("muffin man"), None);
1149 #[unstable = "might get removed in favor of a more generic find in the future"]
1150 fn find_str(&self, needle: &str) -> Option<uint> {
1151 core_str::StrExt::find_str(self[], needle)
1154 /// Retrieves the first character from a string slice and returns
1155 /// it. This does not allocate a new string; instead, it returns a
1156 /// slice that point one character beyond the character that was
1157 /// shifted. If the string does not contain any characters,
1158 /// None is returned instead.
1163 /// let s = "Löwe 老虎 Léopard";
1164 /// let (c, s1) = s.slice_shift_char().unwrap();
1165 /// assert_eq!(c, 'L');
1166 /// assert_eq!(s1, "öwe 老虎 Léopard");
1168 /// let (c, s2) = s1.slice_shift_char().unwrap();
1169 /// assert_eq!(c, 'ö');
1170 /// assert_eq!(s2, "we 老虎 Léopard");
1172 #[unstable = "awaiting conventions about shifting and slices"]
1173 fn slice_shift_char(&self) -> Option<(char, &str)> {
1174 core_str::StrExt::slice_shift_char(self[])
1177 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1179 /// Panics if `inner` is not a direct slice contained within self.
1184 /// let string = "a\nb\nc";
1185 /// let lines: Vec<&str> = string.lines().collect();
1187 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1188 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1189 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1191 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1192 fn subslice_offset(&self, inner: &str) -> uint {
1193 core_str::StrExt::subslice_offset(self[], inner)
1196 /// Return an unsafe pointer to the strings buffer.
1198 /// The caller must ensure that the string outlives this pointer,
1199 /// and that it is not reallocated (e.g. by pushing to the
1203 fn as_ptr(&self) -> *const u8 {
1204 core_str::StrExt::as_ptr(self[])
1207 /// Return an iterator of `u16` over the string encoded as UTF-16.
1208 #[unstable = "this functionality may only be provided by libunicode"]
1209 fn utf16_units(&self) -> Utf16Units {
1210 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1213 /// Return the number of bytes in this string
1218 /// assert_eq!("foo".len(), 3);
1219 /// assert_eq!("ƒoo".len(), 4);
1223 fn len(&self) -> uint {
1224 core_str::StrExt::len(self[])
1227 /// Returns true if this slice contains no bytes
1232 /// assert!("".is_empty());
1236 fn is_empty(&self) -> bool {
1237 core_str::StrExt::is_empty(self[])
1240 /// Parse this string into the specified type.
1245 /// assert_eq!("4".parse::<u32>(), Some(4));
1246 /// assert_eq!("j".parse::<u32>(), None);
1249 #[unstable = "this method was just created"]
1250 fn parse<F: FromStr>(&self) -> Option<F> {
1251 core_str::StrExt::parse(self[])
1254 /// Returns an iterator over the
1255 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1258 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1259 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1260 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1261 /// recommends extended grapheme cluster boundaries for general processing.
1266 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1267 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1268 /// assert_eq!(gr1.as_slice(), b);
1269 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1270 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1271 /// assert_eq!(gr2.as_slice(), b);
1273 #[unstable = "this functionality may only be provided by libunicode"]
1274 fn graphemes(&self, is_extended: bool) -> Graphemes {
1275 UnicodeStr::graphemes(self[], is_extended)
1278 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1279 /// See `graphemes()` method for more information.
1284 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1285 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1286 /// assert_eq!(gr_inds.as_slice(), b);
1288 #[unstable = "this functionality may only be provided by libunicode"]
1289 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1290 UnicodeStr::grapheme_indices(self[], is_extended)
1293 /// An iterator over the words of a string (subsequences separated
1294 /// by any sequence of whitespace). Sequences of whitespace are
1295 /// collapsed, so empty "words" are not included.
1300 /// let some_words = " Mary had\ta little \n\t lamb";
1301 /// let v: Vec<&str> = some_words.words().collect();
1302 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1305 fn words(&self) -> Words {
1306 UnicodeStr::words(self[])
1309 /// Returns a string's displayed width in columns, treating control
1310 /// characters as zero-width.
1312 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1313 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1314 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1315 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1316 /// recommends that these characters be treated as 1 column (i.e.,
1317 /// `is_cjk` = `false`) if the locale is unknown.
1318 #[unstable = "this functionality may only be provided by libunicode"]
1319 fn width(&self, is_cjk: bool) -> uint {
1320 UnicodeStr::width(self[], is_cjk)
1323 /// Returns a string with leading and trailing whitespace removed.
1325 fn trim(&self) -> &str {
1326 UnicodeStr::trim(self[])
1329 /// Returns a string with leading whitespace removed.
1331 fn trim_left(&self) -> &str {
1332 UnicodeStr::trim_left(self[])
1335 /// Returns a string with trailing whitespace removed.
1337 fn trim_right(&self) -> &str {
1338 UnicodeStr::trim_right(self[])
1342 impl StrExt for str {}
1348 use core::iter::AdditiveIterator;
1349 use super::from_utf8;
1350 use super::Utf8Error;
1355 assert!("" <= "foo");
1356 assert!("foo" <= "foo");
1357 assert!("foo" != "bar");
1362 assert_eq!("".len(), 0u);
1363 assert_eq!("hello world".len(), 11u);
1364 assert_eq!("\x63".len(), 1u);
1365 assert_eq!("\u{a2}".len(), 2u);
1366 assert_eq!("\u{3c0}".len(), 2u);
1367 assert_eq!("\u{2620}".len(), 3u);
1368 assert_eq!("\u{1d11e}".len(), 4u);
1370 assert_eq!("".chars().count(), 0u);
1371 assert_eq!("hello world".chars().count(), 11u);
1372 assert_eq!("\x63".chars().count(), 1u);
1373 assert_eq!("\u{a2}".chars().count(), 1u);
1374 assert_eq!("\u{3c0}".chars().count(), 1u);
1375 assert_eq!("\u{2620}".chars().count(), 1u);
1376 assert_eq!("\u{1d11e}".chars().count(), 1u);
1377 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1379 assert_eq!("hello".width(false), 10u);
1380 assert_eq!("hello".width(true), 10u);
1381 assert_eq!("\0\0\0\0\0".width(false), 0u);
1382 assert_eq!("\0\0\0\0\0".width(true), 0u);
1383 assert_eq!("".width(false), 0u);
1384 assert_eq!("".width(true), 0u);
1385 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1386 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1391 assert_eq!("hello".find('l'), Some(2u));
1392 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1393 assert!("hello".find('x').is_none());
1394 assert!("hello".find(|&: c:char| c == 'x').is_none());
1395 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1396 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1401 assert_eq!("hello".rfind('l'), Some(3u));
1402 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1403 assert!("hello".rfind('x').is_none());
1404 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1405 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1406 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1411 let empty = String::from_str("");
1412 let s: String = empty.chars().collect();
1413 assert_eq!(empty, s);
1414 let data = String::from_str("ประเทศไทย中");
1415 let s: String = data.chars().collect();
1416 assert_eq!(data, s);
1420 fn test_into_bytes() {
1421 let data = String::from_str("asdf");
1422 let buf = data.into_bytes();
1423 assert_eq!(b"asdf", buf);
1427 fn test_find_str() {
1429 assert_eq!("".find_str(""), Some(0u));
1430 assert!("banana".find_str("apple pie").is_none());
1432 let data = "abcabc";
1433 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1434 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1435 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1437 let string = "ประเทศไทย中华Việt Nam";
1438 let mut data = String::from_str(string);
1439 data.push_str(string);
1440 assert!(data.find_str("ไท华").is_none());
1441 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1442 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1444 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1445 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1446 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1447 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1448 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1450 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1451 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1452 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1453 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1454 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1458 fn test_slice_chars() {
1459 fn t(a: &str, b: &str, start: uint) {
1460 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1463 t("hello", "llo", 2);
1464 t("hello", "el", 1);
1467 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1470 fn s(x: &str) -> String { x.to_string() }
1472 macro_rules! test_concat {
1473 ($expected: expr, $string: expr) => {
1475 let s: String = $string.concat();
1476 assert_eq!($expected, s);
1482 fn test_concat_for_different_types() {
1483 test_concat!("ab", vec![s("a"), s("b")]);
1484 test_concat!("ab", vec!["a", "b"]);
1485 test_concat!("ab", vec!["a", "b"].as_slice());
1486 test_concat!("ab", vec![s("a"), s("b")]);
1490 fn test_concat_for_different_lengths() {
1491 let empty: &[&str] = &[];
1492 test_concat!("", empty);
1493 test_concat!("a", ["a"]);
1494 test_concat!("ab", ["a", "b"]);
1495 test_concat!("abc", ["", "a", "bc"]);
1498 macro_rules! test_connect {
1499 ($expected: expr, $string: expr, $delim: expr) => {
1501 let s = $string.connect($delim);
1502 assert_eq!($expected, s);
1508 fn test_connect_for_different_types() {
1509 test_connect!("a-b", ["a", "b"], "-");
1510 let hyphen = "-".to_string();
1511 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1512 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1513 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1514 test_connect!("a-b", vec![s("a"), s("b")], "-");
1518 fn test_connect_for_different_lengths() {
1519 let empty: &[&str] = &[];
1520 test_connect!("", empty, "-");
1521 test_connect!("a", ["a"], "-");
1522 test_connect!("a-b", ["a", "b"], "-");
1523 test_connect!("-a-bc", ["", "a", "bc"], "-");
1527 fn test_unsafe_slice() {
1528 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1529 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1530 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1531 fn a_million_letter_a() -> String {
1533 let mut rs = String::new();
1535 rs.push_str("aaaaaaaaaa");
1540 fn half_a_million_letter_a() -> String {
1542 let mut rs = String::new();
1544 rs.push_str("aaaaa");
1549 let letters = a_million_letter_a();
1550 assert!(half_a_million_letter_a() ==
1551 unsafe {String::from_str(letters.slice_unchecked(
1557 fn test_starts_with() {
1558 assert!(("".starts_with("")));
1559 assert!(("abc".starts_with("")));
1560 assert!(("abc".starts_with("a")));
1561 assert!((!"a".starts_with("abc")));
1562 assert!((!"".starts_with("abc")));
1563 assert!((!"ödd".starts_with("-")));
1564 assert!(("ödd".starts_with("öd")));
1568 fn test_ends_with() {
1569 assert!(("".ends_with("")));
1570 assert!(("abc".ends_with("")));
1571 assert!(("abc".ends_with("c")));
1572 assert!((!"a".ends_with("abc")));
1573 assert!((!"".ends_with("abc")));
1574 assert!((!"ddö".ends_with("-")));
1575 assert!(("ddö".ends_with("dö")));
1579 fn test_is_empty() {
1580 assert!("".is_empty());
1581 assert!(!"a".is_empty());
1587 assert_eq!("".replace(a, "b"), String::from_str(""));
1588 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1589 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1591 assert!(" test test ".replace(test, "toast") ==
1592 String::from_str(" toast toast "));
1593 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1597 fn test_replace_2a() {
1598 let data = "ประเทศไทย中华";
1599 let repl = "دولة الكويت";
1602 let a2 = "دولة الكويتทศไทย中华";
1603 assert_eq!(data.replace(a, repl), a2);
1607 fn test_replace_2b() {
1608 let data = "ประเทศไทย中华";
1609 let repl = "دولة الكويت";
1612 let b2 = "ปรدولة الكويتทศไทย中华";
1613 assert_eq!(data.replace(b, repl), b2);
1617 fn test_replace_2c() {
1618 let data = "ประเทศไทย中华";
1619 let repl = "دولة الكويت";
1622 let c2 = "ประเทศไทยدولة الكويت";
1623 assert_eq!(data.replace(c, repl), c2);
1627 fn test_replace_2d() {
1628 let data = "ประเทศไทย中华";
1629 let repl = "دولة الكويت";
1632 assert_eq!(data.replace(d, repl), data);
1637 assert_eq!("ab", "abc".slice(0, 2));
1638 assert_eq!("bc", "abc".slice(1, 3));
1639 assert_eq!("", "abc".slice(1, 1));
1640 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1642 let data = "ประเทศไทย中华";
1643 assert_eq!("ป", data.slice(0, 3));
1644 assert_eq!("ร", data.slice(3, 6));
1645 assert_eq!("", data.slice(3, 3));
1646 assert_eq!("华", data.slice(30, 33));
1648 fn a_million_letter_x() -> String {
1650 let mut rs = String::new();
1652 rs.push_str("华华华华华华华华华华");
1657 fn half_a_million_letter_x() -> String {
1659 let mut rs = String::new();
1661 rs.push_str("华华华华华");
1666 let letters = a_million_letter_x();
1667 assert!(half_a_million_letter_x() ==
1668 String::from_str(letters.slice(0u, 3u * 500000u)));
1673 let ss = "中华Việt Nam";
1675 assert_eq!("华", ss.slice(3u, 6u));
1676 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1678 assert_eq!("ab", "abc".slice(0u, 2u));
1679 assert_eq!("bc", "abc".slice(1u, 3u));
1680 assert_eq!("", "abc".slice(1u, 1u));
1682 assert_eq!("中", ss.slice(0u, 3u));
1683 assert_eq!("华V", ss.slice(3u, 7u));
1684 assert_eq!("", ss.slice(3u, 3u));
1699 fn test_slice_fail() {
1700 "中华Việt Nam".slice(0u, 2u);
1704 fn test_slice_from() {
1705 assert_eq!("abcd".slice_from(0), "abcd");
1706 assert_eq!("abcd".slice_from(2), "cd");
1707 assert_eq!("abcd".slice_from(4), "");
1710 fn test_slice_to() {
1711 assert_eq!("abcd".slice_to(0), "");
1712 assert_eq!("abcd".slice_to(2), "ab");
1713 assert_eq!("abcd".slice_to(4), "abcd");
1717 fn test_trim_left_matches() {
1718 let v: &[char] = &[];
1719 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1720 let chars: &[char] = &['*', ' '];
1721 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1722 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1723 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1725 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1726 let chars: &[char] = &['1', '2'];
1727 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1728 assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1732 fn test_trim_right_matches() {
1733 let v: &[char] = &[];
1734 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1735 let chars: &[char] = &['*', ' '];
1736 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1737 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1738 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1740 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1741 let chars: &[char] = &['1', '2'];
1742 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1743 assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1747 fn test_trim_matches() {
1748 let v: &[char] = &[];
1749 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1750 let chars: &[char] = &['*', ' '];
1751 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1752 assert_eq!(" *** *** ".trim_matches(chars), "");
1753 assert_eq!("foo".trim_matches(chars), "foo");
1755 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1756 let chars: &[char] = &['1', '2'];
1757 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1758 assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1762 fn test_trim_left() {
1763 assert_eq!("".trim_left(), "");
1764 assert_eq!("a".trim_left(), "a");
1765 assert_eq!(" ".trim_left(), "");
1766 assert_eq!(" blah".trim_left(), "blah");
1767 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1768 assert_eq!("hey ".trim_left(), "hey ");
1772 fn test_trim_right() {
1773 assert_eq!("".trim_right(), "");
1774 assert_eq!("a".trim_right(), "a");
1775 assert_eq!(" ".trim_right(), "");
1776 assert_eq!("blah ".trim_right(), "blah");
1777 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1778 assert_eq!(" hey".trim_right(), " hey");
1783 assert_eq!("".trim(), "");
1784 assert_eq!("a".trim(), "a");
1785 assert_eq!(" ".trim(), "");
1786 assert_eq!(" blah ".trim(), "blah");
1787 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1788 assert_eq!(" hey dude ".trim(), "hey dude");
1792 fn test_is_whitespace() {
1793 assert!("".chars().all(|c| c.is_whitespace()));
1794 assert!(" ".chars().all(|c| c.is_whitespace()));
1795 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1796 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1797 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1801 fn test_slice_shift_char() {
1802 let data = "ประเทศไทย中";
1803 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1807 fn test_slice_shift_char_2() {
1809 assert_eq!(empty.slice_shift_char(), None);
1814 // deny overlong encodings
1815 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1816 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1817 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1818 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1819 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1820 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1821 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1824 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1825 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1827 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1828 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1829 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1830 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1831 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1832 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1833 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1834 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1838 fn test_is_utf16() {
1839 use unicode::str::is_utf16;
1840 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1848 // surrogate pairs (randomly generated with Python 3's
1849 // .encode('utf-16be'))
1850 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1851 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1852 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1854 // mixtures (also random)
1855 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1856 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1857 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1860 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1863 // surrogate + regular unit
1865 // surrogate + lead surrogate
1867 // unterminated surrogate
1869 // trail surrogate without a lead
1872 // random byte sequences that Python 3's .decode('utf-16be')
1874 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1875 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1876 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1877 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1878 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1879 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1880 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1881 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1882 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1883 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1884 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1885 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1886 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1887 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1888 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1889 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1890 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1891 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1892 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1893 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1894 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1898 fn test_as_bytes() {
1901 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1902 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1906 assert_eq!("".as_bytes(), b);
1907 assert_eq!("abc".as_bytes(), b"abc");
1908 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1913 fn test_as_bytes_fail() {
1914 // Don't double free. (I'm not sure if this exercises the
1915 // original problem code path anymore.)
1916 let s = String::from_str("");
1917 let _bytes = s.as_bytes();
1923 let buf = "hello".as_ptr();
1925 assert_eq!(*buf.offset(0), b'h');
1926 assert_eq!(*buf.offset(1), b'e');
1927 assert_eq!(*buf.offset(2), b'l');
1928 assert_eq!(*buf.offset(3), b'l');
1929 assert_eq!(*buf.offset(4), b'o');
1934 fn test_subslice_offset() {
1935 let a = "kernelsprite";
1936 let b = a.slice(7, a.len());
1937 let c = a.slice(0, a.len() - 6);
1938 assert_eq!(a.subslice_offset(b), 7);
1939 assert_eq!(a.subslice_offset(c), 0);
1941 let string = "a\nb\nc";
1942 let lines: Vec<&str> = string.lines().collect();
1943 assert_eq!(string.subslice_offset(lines[0]), 0);
1944 assert_eq!(string.subslice_offset(lines[1]), 2);
1945 assert_eq!(string.subslice_offset(lines[2]), 4);
1950 fn test_subslice_offset_2() {
1951 let a = "alchemiter";
1952 let b = "cruxtruder";
1953 a.subslice_offset(b);
1957 fn vec_str_conversions() {
1958 let s1: String = String::from_str("All mimsy were the borogoves");
1960 let v: Vec<u8> = s1.as_bytes().to_vec();
1961 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1962 let mut i: uint = 0u;
1963 let n1: uint = s1.len();
1964 let n2: uint = v.len();
1967 let a: u8 = s1.as_bytes()[i];
1968 let b: u8 = s2.as_bytes()[i];
1977 fn test_contains() {
1978 assert!("abcde".contains("bcd"));
1979 assert!("abcde".contains("abcd"));
1980 assert!("abcde".contains("bcde"));
1981 assert!("abcde".contains(""));
1982 assert!("".contains(""));
1983 assert!(!"abcde".contains("def"));
1984 assert!(!"".contains("a"));
1986 let data = "ประเทศไทย中华Việt Nam";
1987 assert!(data.contains("ประเ"));
1988 assert!(data.contains("ะเ"));
1989 assert!(data.contains("中华"));
1990 assert!(!data.contains("ไท华"));
1994 fn test_contains_char() {
1995 assert!("abc".contains_char('b'));
1996 assert!("a".contains_char('a'));
1997 assert!(!"abc".contains_char('d'));
1998 assert!(!"".contains_char('a'));
2003 let s = "ศไทย中华Việt Nam";
2004 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2006 for ch in v.iter() {
2007 assert!(s.char_at(pos) == *ch);
2008 pos += ch.to_string().len();
2013 fn test_char_at_reverse() {
2014 let s = "ศไทย中华Việt Nam";
2015 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2016 let mut pos = s.len();
2017 for ch in v.iter().rev() {
2018 assert!(s.char_at_reverse(pos) == *ch);
2019 pos -= ch.to_string().len();
2024 fn test_escape_unicode() {
2025 assert_eq!("abc".escape_unicode(),
2026 String::from_str("\\u{61}\\u{62}\\u{63}"));
2027 assert_eq!("a c".escape_unicode(),
2028 String::from_str("\\u{61}\\u{20}\\u{63}"));
2029 assert_eq!("\r\n\t".escape_unicode(),
2030 String::from_str("\\u{d}\\u{a}\\u{9}"));
2031 assert_eq!("'\"\\".escape_unicode(),
2032 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2033 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2034 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2035 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2036 String::from_str("\\u{100}\\u{ffff}"));
2037 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2038 String::from_str("\\u{10000}\\u{10ffff}"));
2039 assert_eq!("ab\u{fb00}".escape_unicode(),
2040 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2041 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2042 String::from_str("\\u{1d4ea}\\u{d}"));
2046 fn test_escape_default() {
2047 assert_eq!("abc".escape_default(), String::from_str("abc"));
2048 assert_eq!("a c".escape_default(), String::from_str("a c"));
2049 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2050 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2051 assert_eq!("\u{100}\u{ffff}".escape_default(),
2052 String::from_str("\\u{100}\\u{ffff}"));
2053 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2054 String::from_str("\\u{10000}\\u{10ffff}"));
2055 assert_eq!("ab\u{fb00}".escape_default(),
2056 String::from_str("ab\\u{fb00}"));
2057 assert_eq!("\u{1d4ea}\r".escape_default(),
2058 String::from_str("\\u{1d4ea}\\r"));
2062 fn test_total_ord() {
2063 "1234".cmp("123") == Greater;
2064 "123".cmp("1234") == Less;
2065 "1234".cmp("1234") == Equal;
2066 "12345555".cmp("123456") == Less;
2067 "22".cmp("1234") == Greater;
2071 fn test_char_range_at() {
2072 let data = "b¢€𤭢𤭢€¢b";
2073 assert_eq!('b', data.char_range_at(0).ch);
2074 assert_eq!('¢', data.char_range_at(1).ch);
2075 assert_eq!('€', data.char_range_at(3).ch);
2076 assert_eq!('𤭢', data.char_range_at(6).ch);
2077 assert_eq!('𤭢', data.char_range_at(10).ch);
2078 assert_eq!('€', data.char_range_at(14).ch);
2079 assert_eq!('¢', data.char_range_at(17).ch);
2080 assert_eq!('b', data.char_range_at(19).ch);
2084 fn test_char_range_at_reverse_underflow() {
2085 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2089 fn test_iterator() {
2090 let s = "ศไทย中华Việt Nam";
2091 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2094 let mut it = s.chars();
2097 assert_eq!(c, v[pos]);
2100 assert_eq!(pos, v.len());
2104 fn test_rev_iterator() {
2105 let s = "ศไทย中华Việt Nam";
2106 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2109 let mut it = s.chars().rev();
2112 assert_eq!(c, v[pos]);
2115 assert_eq!(pos, v.len());
2119 fn test_chars_decoding() {
2120 let mut bytes = [0u8; 4];
2121 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2122 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2123 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2124 if Some(c) != s.chars().next() {
2125 panic!("character {:x}={} does not decode correctly", c as u32, c);
2131 fn test_chars_rev_decoding() {
2132 let mut bytes = [0u8; 4];
2133 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2134 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2135 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2136 if Some(c) != s.chars().rev().next() {
2137 panic!("character {:x}={} does not decode correctly", c as u32, c);
2143 fn test_iterator_clone() {
2144 let s = "ศไทย中华Việt Nam";
2145 let mut it = s.chars();
2147 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2151 fn test_bytesator() {
2152 let s = "ศไทย中华Việt Nam";
2154 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2155 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2160 for b in s.bytes() {
2161 assert_eq!(b, v[pos]);
2167 fn test_bytes_revator() {
2168 let s = "ศไทย中华Việt Nam";
2170 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2171 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2174 let mut pos = v.len();
2176 for b in s.bytes().rev() {
2178 assert_eq!(b, v[pos]);
2183 fn test_char_indicesator() {
2184 let s = "ศไทย中华Việt Nam";
2185 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2186 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2189 let mut it = s.char_indices();
2192 assert_eq!(c, (p[pos], v[pos]));
2195 assert_eq!(pos, v.len());
2196 assert_eq!(pos, p.len());
2200 fn test_char_indices_revator() {
2201 let s = "ศไทย中华Việt Nam";
2202 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2203 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2206 let mut it = s.char_indices().rev();
2209 assert_eq!(c, (p[pos], v[pos]));
2212 assert_eq!(pos, v.len());
2213 assert_eq!(pos, p.len());
2217 fn test_splitn_char_iterator() {
2218 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2220 let split: Vec<&str> = data.splitn(3, ' ').collect();
2221 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2223 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2224 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2227 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2228 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2230 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2231 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2235 fn test_split_char_iterator_no_trailing() {
2236 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2238 let split: Vec<&str> = data.split('\n').collect();
2239 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2241 let split: Vec<&str> = data.split_terminator('\n').collect();
2242 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2247 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2248 let words: Vec<&str> = data.words().collect();
2249 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2253 fn test_nfd_chars() {
2255 ($input: expr, $expected: expr) => {
2256 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2260 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2261 t!("\u{2026}", "\u{2026}");
2262 t!("\u{2126}", "\u{3a9}");
2263 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2264 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2265 t!("a\u{301}", "a\u{301}");
2266 t!("\u{301}a", "\u{301}a");
2267 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2268 t!("\u{ac1c}", "\u{1100}\u{1162}");
2272 fn test_nfkd_chars() {
2274 ($input: expr, $expected: expr) => {
2275 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2279 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2280 t!("\u{2026}", "...");
2281 t!("\u{2126}", "\u{3a9}");
2282 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2283 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2284 t!("a\u{301}", "a\u{301}");
2285 t!("\u{301}a", "\u{301}a");
2286 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2287 t!("\u{ac1c}", "\u{1100}\u{1162}");
2291 fn test_nfc_chars() {
2293 ($input: expr, $expected: expr) => {
2294 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2298 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2299 t!("\u{2026}", "\u{2026}");
2300 t!("\u{2126}", "\u{3a9}");
2301 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2302 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2303 t!("a\u{301}", "\u{e1}");
2304 t!("\u{301}a", "\u{301}a");
2305 t!("\u{d4db}", "\u{d4db}");
2306 t!("\u{ac1c}", "\u{ac1c}");
2307 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2311 fn test_nfkc_chars() {
2313 ($input: expr, $expected: expr) => {
2314 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2318 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2319 t!("\u{2026}", "...");
2320 t!("\u{2126}", "\u{3a9}");
2321 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2322 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2323 t!("a\u{301}", "\u{e1}");
2324 t!("\u{301}a", "\u{301}a");
2325 t!("\u{d4db}", "\u{d4db}");
2326 t!("\u{ac1c}", "\u{ac1c}");
2327 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2332 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2333 let lines: Vec<&str> = data.lines().collect();
2334 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2336 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2337 let lines: Vec<&str> = data.lines().collect();
2338 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2342 fn test_graphemes() {
2343 use core::iter::order;
2344 // official Unicode test data
2345 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2346 let test_same: [(_, &[_]); 325] = [
2347 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2348 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2349 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2350 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2351 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2352 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2353 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2354 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2355 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2356 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2357 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2358 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2359 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2360 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2361 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2362 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2363 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2364 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2365 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2366 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2367 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2368 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2369 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2370 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2371 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2372 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2373 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2374 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2375 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2376 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2377 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2378 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2379 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2380 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2381 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2382 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2383 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2384 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2385 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2386 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2387 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2388 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2389 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2390 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2391 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2392 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2393 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2394 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2395 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2396 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2397 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2398 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2399 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2400 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2401 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2402 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2403 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2404 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2405 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2406 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2407 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2408 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2409 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2410 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2411 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2412 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2413 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2414 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2415 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2416 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2417 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2418 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2419 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2420 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2421 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2422 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2423 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2424 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2425 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2426 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2427 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2428 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2429 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2430 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2431 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2432 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2433 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2434 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2435 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2436 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2437 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2438 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2439 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2440 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2441 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2442 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2443 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2444 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2445 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2446 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2447 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2448 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2449 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2450 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2451 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2452 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2453 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2454 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2455 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2456 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2457 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2458 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2459 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2460 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2461 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2462 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2463 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2464 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2465 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2466 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2467 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2468 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2469 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2470 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2471 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2472 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2473 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2474 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2475 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2476 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2477 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2478 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2479 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2480 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2481 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2482 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2483 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2484 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2485 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2486 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2487 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2488 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2489 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2490 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2491 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2492 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2493 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2494 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2495 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2496 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2497 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2498 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2499 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2500 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2501 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2502 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2503 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2504 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2505 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2506 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2507 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2508 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2509 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2510 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2511 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2512 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2513 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2514 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2515 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2516 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2517 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2518 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2519 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2520 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2521 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2522 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2523 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2524 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2525 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2526 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2527 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2528 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2529 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2530 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2531 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2532 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2533 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2534 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2535 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2536 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2537 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2538 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2539 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2540 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2541 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2542 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2543 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2544 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2545 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2546 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2547 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2548 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2549 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2550 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2551 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2552 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2553 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2554 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2555 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2556 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2557 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2558 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2559 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2560 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2561 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2562 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2563 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2564 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2565 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2566 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2567 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2568 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2569 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2570 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2571 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2572 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2573 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2574 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2575 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2576 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2577 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2578 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2579 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2580 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2581 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2582 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2583 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2584 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2585 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2586 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2587 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2588 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2589 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2590 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2591 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2592 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2593 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2594 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2595 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2596 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2597 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2598 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2599 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2600 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2601 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2602 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2603 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2604 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2605 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2606 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2607 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2608 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2609 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2610 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2611 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2612 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2613 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2614 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2615 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2616 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2617 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2618 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2619 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2620 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2621 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2622 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2623 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2624 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2625 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2626 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2627 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2628 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2629 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2630 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2631 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2632 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2633 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2634 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2635 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2636 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2637 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2638 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2639 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2640 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2641 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2642 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2643 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2644 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2645 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2646 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2647 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2648 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2649 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2650 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2651 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2652 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2653 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2654 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2655 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2656 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2657 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2658 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2659 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2660 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2661 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2662 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2663 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2664 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2665 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2666 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2667 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2668 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2669 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2670 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2671 "\u{1F1E7}\u{1F1E8}"]),
2672 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2673 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2674 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2675 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2678 let test_diff: [(_, &[_], &[_]); 23] = [
2679 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2680 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2681 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2682 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2683 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2684 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2685 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2686 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2687 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2688 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2689 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2690 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2691 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2692 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2693 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2694 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2695 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2696 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2697 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2698 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2699 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2700 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2701 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2704 for &(s, g) in test_same.iter() {
2705 // test forward iterator
2706 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2707 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2709 // test reverse iterator
2710 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2711 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2714 for &(s, gt, gf) in test_diff.iter() {
2715 // test forward iterator
2716 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2717 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2719 // test reverse iterator
2720 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2721 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2724 // test the indices iterators
2725 let s = "a̐éö̲\r\n";
2726 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2727 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2728 assert_eq!(gr_inds, b);
2729 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2730 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2731 assert_eq!(gr_inds, b);
2732 let mut gr_inds_iter = s.grapheme_indices(true);
2734 let gr_inds = gr_inds_iter.by_ref();
2735 let e1 = gr_inds.size_hint();
2736 assert_eq!(e1, (1, Some(13)));
2737 let c = gr_inds.count();
2740 let e2 = gr_inds_iter.size_hint();
2741 assert_eq!(e2, (0, Some(0)));
2743 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2745 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2746 let b: &[_] = &["\r", "\r\n", "\n"];
2751 fn test_split_strator() {
2752 fn t(s: &str, sep: &str, u: &[&str]) {
2753 let v: Vec<&str> = s.split_str(sep).collect();
2756 t("--1233345--", "12345", &["--1233345--"]);
2757 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2758 t("::hello::there", "::", &["", "hello", "there"]);
2759 t("hello::there::", "::", &["hello", "there", ""]);
2760 t("::hello::there::", "::", &["", "hello", "there", ""]);
2761 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2762 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2763 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2764 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2766 t("zz", "zz", &["",""]);
2767 t("ok", "z", &["ok"]);
2768 t("zzz", "zz", &["","z"]);
2769 t("zzzzz", "zz", &["","","z"]);
2773 fn test_str_default() {
2774 use core::default::Default;
2775 fn t<S: Default + Str>() {
2776 let s: S = Default::default();
2777 assert_eq!(s.as_slice(), "");
2785 fn test_str_container() {
2786 fn sum_len(v: &[&str]) -> uint {
2787 v.iter().map(|x| x.len()).sum()
2790 let s = String::from_str("01234");
2791 assert_eq!(5, sum_len(&["012", "", "34"]));
2792 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2793 String::from_str("2").as_slice(),
2794 String::from_str("34").as_slice(),
2795 String::from_str("").as_slice()]));
2796 assert_eq!(5, sum_len(&[s.as_slice()]));
2800 fn test_str_from_utf8() {
2802 assert_eq!(from_utf8(xs), Ok("hello"));
2804 let xs = "ศไทย中华Việt Nam".as_bytes();
2805 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2807 let xs = b"hello\xFF";
2808 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2815 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2817 use test::black_box;
2820 fn char_iterator(b: &mut Bencher) {
2821 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2823 b.iter(|| s.chars().count());
2827 fn char_iterator_for(b: &mut Bencher) {
2828 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2831 for ch in s.chars() { black_box(ch) }
2836 fn char_iterator_ascii(b: &mut Bencher) {
2837 let s = "Mary had a little lamb, Little lamb
2838 Mary had a little lamb, Little lamb
2839 Mary had a little lamb, Little lamb
2840 Mary had a little lamb, Little lamb
2841 Mary had a little lamb, Little lamb
2842 Mary had a little lamb, Little lamb";
2844 b.iter(|| s.chars().count());
2848 fn char_iterator_rev(b: &mut Bencher) {
2849 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2851 b.iter(|| s.chars().rev().count());
2855 fn char_iterator_rev_for(b: &mut Bencher) {
2856 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2859 for ch in s.chars().rev() { black_box(ch) }
2864 fn char_indicesator(b: &mut Bencher) {
2865 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2866 let len = s.chars().count();
2868 b.iter(|| assert_eq!(s.char_indices().count(), len));
2872 fn char_indicesator_rev(b: &mut Bencher) {
2873 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2874 let len = s.chars().count();
2876 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2880 fn split_unicode_ascii(b: &mut Bencher) {
2881 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2883 b.iter(|| assert_eq!(s.split('V').count(), 3));
2887 fn split_unicode_not_ascii(b: &mut Bencher) {
2888 struct NotAscii(char);
2889 impl CharEq for NotAscii {
2890 fn matches(&mut self, c: char) -> bool {
2891 let NotAscii(cc) = *self;
2894 fn only_ascii(&self) -> bool { false }
2896 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2898 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2903 fn split_ascii(b: &mut Bencher) {
2904 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2905 let len = s.split(' ').count();
2907 b.iter(|| assert_eq!(s.split(' ').count(), len));
2911 fn split_not_ascii(b: &mut Bencher) {
2912 struct NotAscii(char);
2913 impl CharEq for NotAscii {
2915 fn matches(&mut self, c: char) -> bool {
2916 let NotAscii(cc) = *self;
2919 fn only_ascii(&self) -> bool { false }
2921 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2922 let len = s.split(' ').count();
2924 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2928 fn split_extern_fn(b: &mut Bencher) {
2929 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2930 let len = s.split(' ').count();
2931 fn pred(c: char) -> bool { c == ' ' }
2933 b.iter(|| assert_eq!(s.split(pred).count(), len));
2937 fn split_closure(b: &mut Bencher) {
2938 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2939 let len = s.split(' ').count();
2941 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2945 fn split_slice(b: &mut Bencher) {
2946 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2947 let len = s.split(' ').count();
2949 let c: &[char] = &[' '];
2950 b.iter(|| assert_eq!(s.split(c).count(), len));
2954 fn bench_connect(b: &mut Bencher) {
2955 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2957 let v = vec![s, s, s, s, s, s, s, s, s, s];
2959 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2964 fn bench_contains_short_short(b: &mut Bencher) {
2965 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2969 assert!(haystack.contains(needle));
2974 fn bench_contains_short_long(b: &mut Bencher) {
2976 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2977 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2978 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2979 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2980 tempus vel, gravida nec quam.
2982 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2983 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2984 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2985 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2986 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2987 interdum. Curabitur ut nisi justo.
2989 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2990 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2991 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2992 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2993 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2994 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2995 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2996 Aliquam sit amet placerat lorem.
2998 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2999 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3000 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3001 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3002 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3005 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3006 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3007 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3008 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3009 malesuada sollicitudin quam eu fermentum.";
3010 let needle = "english";
3013 assert!(!haystack.contains(needle));
3018 fn bench_contains_bad_naive(b: &mut Bencher) {
3019 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3020 let needle = "aaaaaaaab";
3023 assert!(!haystack.contains(needle));
3028 fn bench_contains_equal(b: &mut Bencher) {
3029 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3030 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3033 assert!(haystack.contains(needle));