1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
58 use core::borrow::{BorrowFrom, ToOwned};
59 use core::char::CharExt;
60 use core::clone::Clone;
61 use core::iter::AdditiveIterator;
62 use core::iter::{range, Iterator, IteratorExt};
63 use core::ops::{FullRange, Index};
64 use core::option::Option::{self, Some, None};
65 use core::slice::AsSlice;
66 use core::str as core_str;
67 use unicode::str::{UnicodeStr, Utf16Encoder};
69 use ring_buf::RingBuf;
74 use slice::SliceConcatExt;
76 pub use core::str::{FromStr, Utf8Error, Str};
77 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
78 pub use core::str::{Split, SplitTerminator};
79 pub use core::str::{SplitN, RSplitN};
80 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
81 pub use core::str::{from_utf8_unchecked, from_c_str};
82 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
85 Section: Creating a string
88 impl<S: Str> SliceConcatExt<str, String> for [S] {
89 fn concat(&self) -> String {
90 let s = self.as_slice();
96 // `len` calculation may overflow but push_str will check boundaries
97 let len = s.iter().map(|s| s.as_slice().len()).sum();
98 let mut result = String::with_capacity(len);
101 result.push_str(s.as_slice())
107 fn connect(&self, sep: &str) -> String {
108 let s = self.as_slice();
111 return String::new();
119 // this is wrong without the guarantee that `self` is non-empty
120 // `len` calculation may overflow but push_str but will check boundaries
121 let len = sep.len() * (s.len() - 1)
122 + s.iter().map(|s| s.as_slice().len()).sum();
123 let mut result = String::with_capacity(len);
124 let mut first = true;
130 result.push_str(sep);
132 result.push_str(s.as_slice());
142 // Helper functions used for Unicode normalization
143 fn canonical_sort(comb: &mut [(char, u8)]) {
144 let len = comb.len();
145 for i in range(0, len) {
146 let mut swapped = false;
147 for j in range(1, len-i) {
148 let class_a = comb[j-1].1;
149 let class_b = comb[j].1;
150 if class_a != 0 && class_b != 0 && class_a > class_b {
155 if !swapped { break; }
160 enum DecompositionType {
165 /// External iterator for a string's decomposition's characters.
166 /// Use with the `std::iter` module.
169 pub struct Decompositions<'a> {
170 kind: DecompositionType,
172 buffer: Vec<(char, u8)>,
177 impl<'a> Iterator for Decompositions<'a> {
181 fn next(&mut self) -> Option<char> {
182 match self.buffer.first() {
185 self.buffer.remove(0);
188 Some(&(c, _)) if self.sorted => {
189 self.buffer.remove(0);
192 _ => self.sorted = false
196 for ch in self.iter {
197 let buffer = &mut self.buffer;
198 let sorted = &mut self.sorted;
200 let callback = |&mut: d| {
202 unicode::char::canonical_combining_class(d);
203 if class == 0 && !*sorted {
204 canonical_sort(buffer.as_mut_slice());
207 buffer.push((d, class));
211 unicode::char::decompose_canonical(ch, callback)
214 unicode::char::decompose_compatible(ch, callback)
225 canonical_sort(self.buffer.as_mut_slice());
229 if self.buffer.is_empty() {
232 match self.buffer.remove(0) {
242 fn size_hint(&self) -> (uint, Option<uint>) {
243 let (lower, _) = self.iter.size_hint();
249 enum RecompositionState {
255 /// External iterator for a string's recomposition's characters.
256 /// Use with the `std::iter` module.
259 pub struct Recompositions<'a> {
260 iter: Decompositions<'a>,
261 state: RecompositionState,
262 buffer: RingBuf<char>,
263 composee: Option<char>,
268 impl<'a> Iterator for Recompositions<'a> {
272 fn next(&mut self) -> Option<char> {
276 for ch in self.iter {
277 let ch_class = unicode::char::canonical_combining_class(ch);
278 if self.composee.is_none() {
282 self.composee = Some(ch);
285 let k = self.composee.clone().unwrap();
287 match self.last_ccc {
289 match unicode::char::compose(k, ch) {
291 self.composee = Some(r);
296 self.composee = Some(ch);
299 self.buffer.push_back(ch);
300 self.last_ccc = Some(ch_class);
305 if l_class >= ch_class {
306 // `ch` is blocked from `composee`
308 self.composee = Some(ch);
309 self.last_ccc = None;
310 self.state = Purging;
313 self.buffer.push_back(ch);
314 self.last_ccc = Some(ch_class);
317 match unicode::char::compose(k, ch) {
319 self.composee = Some(r);
323 self.buffer.push_back(ch);
324 self.last_ccc = Some(ch_class);
330 self.state = Finished;
331 if self.composee.is_some() {
332 return self.composee.take();
336 match self.buffer.pop_front() {
337 None => self.state = Composing,
342 match self.buffer.pop_front() {
343 None => return self.composee.take(),
352 /// External iterator for a string's UTF16 codeunits.
353 /// Use with the `std::iter` module.
356 pub struct Utf16Units<'a> {
357 encoder: Utf16Encoder<Chars<'a>>
361 impl<'a> Iterator for Utf16Units<'a> {
365 fn next(&mut self) -> Option<u16> { self.encoder.next() }
368 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
375 // Return the initial codepoint accumulator for the first byte.
376 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
377 // for width 3, and 3 bits for width 4
378 macro_rules! utf8_first_byte {
379 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
382 // return the value of $ch updated with continuation byte $byte
383 macro_rules! utf8_acc_cont_byte {
384 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
387 #[unstable = "trait is unstable"]
388 impl BorrowFrom<String> for str {
389 fn borrow_from(owned: &String) -> &str { &owned[] }
392 #[unstable = "trait is unstable"]
393 impl ToOwned<String> for str {
394 fn to_owned(&self) -> String {
396 String::from_utf8_unchecked(self.as_bytes().to_owned())
406 Section: Trait implementations
409 /// Any string that can be represented as a slice.
411 pub trait StrExt: Index<FullRange, Output = str> {
412 /// Escapes each char in `s` with `char::escape_default`.
413 #[unstable = "return type may change to be an iterator"]
414 fn escape_default(&self) -> String {
415 self.chars().flat_map(|c| c.escape_default()).collect()
418 /// Escapes each char in `s` with `char::escape_unicode`.
419 #[unstable = "return type may change to be an iterator"]
420 fn escape_unicode(&self) -> String {
421 self.chars().flat_map(|c| c.escape_unicode()).collect()
424 /// Replaces all occurrences of one string with another.
428 /// * `from` - The string to replace
429 /// * `to` - The replacement string
433 /// The original string with all occurrences of `from` replaced with `to`.
438 /// let s = "Do you know the muffin man,
439 /// The muffin man, the muffin man, ...".to_string();
441 /// assert_eq!(s.replace("muffin man", "little lamb"),
442 /// "Do you know the little lamb,
443 /// The little lamb, the little lamb, ...".to_string());
445 /// // not found, so no change.
446 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
449 fn replace(&self, from: &str, to: &str) -> String {
450 let mut result = String::new();
451 let mut last_end = 0;
452 for (start, end) in self.match_indices(from) {
453 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
457 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
461 /// Returns an iterator over the string in Unicode Normalization Form D
462 /// (canonical decomposition).
464 #[unstable = "this functionality may be moved to libunicode"]
465 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
467 iter: self[].chars(),
474 /// Returns an iterator over the string in Unicode Normalization Form KD
475 /// (compatibility decomposition).
477 #[unstable = "this functionality may be moved to libunicode"]
478 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
480 iter: self[].chars(),
487 /// An Iterator over the string in Unicode Normalization Form C
488 /// (canonical decomposition followed by canonical composition).
490 #[unstable = "this functionality may be moved to libunicode"]
491 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
493 iter: self.nfd_chars(),
495 buffer: RingBuf::new(),
501 /// An Iterator over the string in Unicode Normalization Form KC
502 /// (compatibility decomposition followed by canonical composition).
504 #[unstable = "this functionality may be moved to libunicode"]
505 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
507 iter: self.nfkd_chars(),
509 buffer: RingBuf::new(),
515 /// Returns true if a string contains a string pattern.
519 /// - pat - The string pattern to look for
524 /// assert!("bananas".contains("nana"));
527 fn contains(&self, pat: &str) -> bool {
528 core_str::StrExt::contains(&self[], pat)
531 /// Returns true if a string contains a char pattern.
535 /// - pat - The char pattern to look for
540 /// assert!("hello".contains_char('e'));
542 #[unstable = "might get removed in favour of a more generic contains()"]
543 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
544 core_str::StrExt::contains_char(&self[], pat)
547 /// An iterator over the characters of `self`. Note, this iterates
548 /// over Unicode code-points, not Unicode graphemes.
553 /// let v: Vec<char> = "abc åäö".chars().collect();
554 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
557 fn chars(&self) -> Chars {
558 core_str::StrExt::chars(&self[])
561 /// An iterator over the bytes of `self`
566 /// let v: Vec<u8> = "bors".bytes().collect();
567 /// assert_eq!(v, b"bors".to_vec());
570 fn bytes(&self) -> Bytes {
571 core_str::StrExt::bytes(&self[])
574 /// An iterator over the characters of `self` and their byte offsets.
576 fn char_indices(&self) -> CharIndices {
577 core_str::StrExt::char_indices(&self[])
580 /// An iterator over substrings of `self`, separated by characters
581 /// matched by the pattern `pat`.
586 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
587 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
589 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
590 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
592 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
593 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
595 /// let v: Vec<&str> = "".split('X').collect();
596 /// assert_eq!(v, vec![""]);
599 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
600 core_str::StrExt::split(&self[], pat)
603 /// An iterator over substrings of `self`, separated by characters
604 /// matched by the pattern `pat`, restricted to splitting at most `count`
610 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
611 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
613 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
614 /// assert_eq!(v, vec!["abc", "def2ghi"]);
616 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
617 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
619 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
620 /// assert_eq!(v, vec!["abcXdef"]);
622 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
623 /// assert_eq!(v, vec![""]);
626 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
627 core_str::StrExt::splitn(&self[], count, pat)
630 /// An iterator over substrings of `self`, separated by characters
631 /// matched by the pattern `pat`.
633 /// Equivalent to `split`, except that the trailing substring
634 /// is skipped if empty (terminator semantics).
639 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
640 /// assert_eq!(v, vec!["A", "B"]);
642 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
643 /// assert_eq!(v, vec!["A", "", "B", ""]);
645 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
646 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
648 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
649 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
651 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
652 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
654 #[unstable = "might get removed"]
655 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
656 core_str::StrExt::split_terminator(&self[], pat)
659 /// An iterator over substrings of `self`, separated by characters
660 /// matched by the pattern `pat`, starting from the end of the string.
661 /// Restricted to splitting at most `count` times.
666 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
667 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
669 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
670 /// assert_eq!(v, vec!["ghi", "abc1def"]);
672 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
673 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
676 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
677 core_str::StrExt::rsplitn(&self[], count, pat)
680 /// An iterator over the start and end indices of the disjoint
681 /// matches of the pattern `pat` within `self`.
683 /// That is, each returned value `(start, end)` satisfies
684 /// `self.slice(start, end) == sep`. For matches of `sep` within
685 /// `self` that overlap, only the indices corresponding to the
686 /// first match are returned.
691 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
692 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
694 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
695 /// assert_eq!(v, vec![(1,4), (4,7)]);
697 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
698 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
700 #[unstable = "might have its iterator type changed"]
701 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
702 core_str::StrExt::match_indices(&self[], pat)
705 /// An iterator over the substrings of `self` separated by the pattern `sep`.
710 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
711 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
713 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
714 /// assert_eq!(v, vec!["1", "", "2"]);
716 #[unstable = "might get removed in the future in favor of a more generic split()"]
717 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
718 core_str::StrExt::split_str(&self[], pat)
721 /// An iterator over the lines of a string (subsequences separated
722 /// by `\n`). This does not include the empty string after a
728 /// let four_lines = "foo\nbar\n\nbaz\n";
729 /// let v: Vec<&str> = four_lines.lines().collect();
730 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
733 fn lines(&self) -> Lines {
734 core_str::StrExt::lines(&self[])
737 /// An iterator over the lines of a string, separated by either
738 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
739 /// empty trailing line.
744 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
745 /// let v: Vec<&str> = four_lines.lines_any().collect();
746 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
749 fn lines_any(&self) -> LinesAny {
750 core_str::StrExt::lines_any(&self[])
753 /// Returns a slice of the given string from the byte range
754 /// [`begin`..`end`).
756 /// This operation is `O(1)`.
758 /// Panics when `begin` and `end` do not point to valid characters
759 /// or point beyond the last character of the string.
761 /// See also `slice_to` and `slice_from` for slicing prefixes and
762 /// suffixes of strings, and `slice_chars` for slicing based on
763 /// code point counts.
768 /// let s = "Löwe 老虎 Léopard";
769 /// assert_eq!(s.slice(0, 1), "L");
771 /// assert_eq!(s.slice(1, 9), "öwe 老");
773 /// // these will panic:
774 /// // byte 2 lies within `ö`:
775 /// // s.slice(2, 3);
777 /// // byte 8 lies within `老`
778 /// // s.slice(1, 8);
780 /// // byte 100 is outside the string
781 /// // s.slice(3, 100);
783 #[unstable = "use slice notation [a..b] instead"]
784 fn slice(&self, begin: uint, end: uint) -> &str {
785 core_str::StrExt::slice(&self[], begin, end)
788 /// Returns a slice of the string from `begin` to its end.
790 /// Equivalent to `self.slice(begin, self.len())`.
792 /// Panics when `begin` does not point to a valid character, or is
795 /// See also `slice`, `slice_to` and `slice_chars`.
796 #[unstable = "use slice notation [a..] instead"]
797 fn slice_from(&self, begin: uint) -> &str {
798 core_str::StrExt::slice_from(&self[], begin)
801 /// Returns a slice of the string from the beginning to byte
804 /// Equivalent to `self.slice(0, end)`.
806 /// Panics when `end` does not point to a valid character, or is
809 /// See also `slice`, `slice_from` and `slice_chars`.
810 #[unstable = "use slice notation [..a] instead"]
811 fn slice_to(&self, end: uint) -> &str {
812 core_str::StrExt::slice_to(&self[], end)
815 /// Returns a slice of the string from the character range
816 /// [`begin`..`end`).
818 /// That is, start at the `begin`-th code point of the string and
819 /// continue to the `end`-th code point. This does not detect or
820 /// handle edge cases such as leaving a combining character as the
821 /// first code point of the string.
823 /// Due to the design of UTF-8, this operation is `O(end)`.
824 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
825 /// variants that use byte indices rather than code point
828 /// Panics if `begin` > `end` or the either `begin` or `end` are
829 /// beyond the last character of the string.
834 /// let s = "Löwe 老虎 Léopard";
835 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
836 /// assert_eq!(s.slice_chars(5, 7), "老虎");
838 #[unstable = "may have yet to prove its worth"]
839 fn slice_chars(&self, begin: uint, end: uint) -> &str {
840 core_str::StrExt::slice_chars(&self[], begin, end)
843 /// Takes a bytewise (not UTF-8) slice from a string.
845 /// Returns the substring from [`begin`..`end`).
847 /// Caller must check both UTF-8 character boundaries and the boundaries of
848 /// the entire slice as well.
850 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
851 core_str::StrExt::slice_unchecked(&self[], begin, end)
854 /// Returns true if the pattern `pat` is a prefix of the string.
859 /// assert!("banana".starts_with("ba"));
862 fn starts_with(&self, pat: &str) -> bool {
863 core_str::StrExt::starts_with(&self[], pat)
866 /// Returns true if the pattern `pat` is a suffix of the string.
871 /// assert!("banana".ends_with("nana"));
874 fn ends_with(&self, pat: &str) -> bool {
875 core_str::StrExt::ends_with(&self[], pat)
878 /// Returns a string with all pre- and suffixes that match
879 /// the pattern `pat` repeatedly removed.
883 /// * pat - a string pattern
888 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
889 /// let x: &[_] = &['1', '2'];
890 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
891 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
894 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
895 core_str::StrExt::trim_matches(&self[], pat)
898 /// Returns a string with all prefixes that match
899 /// the pattern `pat` repeatedly removed.
903 /// * pat - a string pattern
908 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
909 /// let x: &[_] = &['1', '2'];
910 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
911 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
914 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
915 core_str::StrExt::trim_left_matches(&self[], pat)
918 /// Returns a string with all suffixes that match
919 /// the pattern `pat` repeatedly removed.
923 /// * pat - a string pattern
928 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
929 /// let x: &[_] = &['1', '2'];
930 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
931 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
934 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
935 core_str::StrExt::trim_right_matches(&self[], pat)
938 /// Check that `index`-th byte lies at the start and/or end of a
939 /// UTF-8 code point sequence.
941 /// The start and end of the string (when `index == self.len()`)
942 /// are considered to be boundaries.
944 /// Panics if `index` is greater than `self.len()`.
949 /// let s = "Löwe 老虎 Léopard";
950 /// assert!(s.is_char_boundary(0));
952 /// assert!(s.is_char_boundary(6));
953 /// assert!(s.is_char_boundary(s.len()));
955 /// // second byte of `ö`
956 /// assert!(!s.is_char_boundary(2));
958 /// // third byte of `老`
959 /// assert!(!s.is_char_boundary(8));
961 #[unstable = "naming is uncertain with container conventions"]
962 fn is_char_boundary(&self, index: uint) -> bool {
963 core_str::StrExt::is_char_boundary(&self[], index)
966 /// Pluck a character out of a string and return the index of the next
969 /// This function can be used to iterate over the Unicode characters of a
974 /// This example manually iterates through the characters of a
975 /// string; this should normally be done by `.chars()` or
979 /// use std::str::CharRange;
981 /// let s = "中华Việt Nam";
983 /// while i < s.len() {
984 /// let CharRange {ch, next} = s.char_range_at(i);
985 /// println!("{}: {}", i, ch);
1007 /// * s - The string
1008 /// * i - The byte offset of the char to extract
1012 /// A record {ch: char, next: uint} containing the char value and the byte
1013 /// index of the next Unicode character.
1017 /// If `i` is greater than or equal to the length of the string.
1018 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1019 #[unstable = "naming is uncertain with container conventions"]
1020 fn char_range_at(&self, start: uint) -> CharRange {
1021 core_str::StrExt::char_range_at(&self[], start)
1024 /// Given a byte position and a str, return the previous char and its position.
1026 /// This function can be used to iterate over a Unicode string in reverse.
1028 /// Returns 0 for next index if called on start index 0.
1032 /// If `i` is greater than the length of the string.
1033 /// If `i` is not an index following a valid UTF-8 character.
1034 #[unstable = "naming is uncertain with container conventions"]
1035 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1036 core_str::StrExt::char_range_at_reverse(&self[], start)
1039 /// Plucks the character starting at the `i`th byte of a string.
1045 /// assert_eq!(s.char_at(1), 'b');
1046 /// assert_eq!(s.char_at(2), 'π');
1047 /// assert_eq!(s.char_at(4), 'c');
1052 /// If `i` is greater than or equal to the length of the string.
1053 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1054 #[unstable = "naming is uncertain with container conventions"]
1055 fn char_at(&self, i: uint) -> char {
1056 core_str::StrExt::char_at(&self[], i)
1059 /// Plucks the character ending at the `i`th byte of a string.
1063 /// If `i` is greater than the length of the string.
1064 /// If `i` is not an index following a valid UTF-8 character.
1065 #[unstable = "naming is uncertain with container conventions"]
1066 fn char_at_reverse(&self, i: uint) -> char {
1067 core_str::StrExt::char_at_reverse(&self[], i)
1070 /// Work with the byte buffer of a string as a byte slice.
1075 /// assert_eq!("bors".as_bytes(), b"bors");
1078 fn as_bytes(&self) -> &[u8] {
1079 core_str::StrExt::as_bytes(&self[])
1082 /// Returns the byte index of the first character of `self` that
1083 /// matches the pattern `pat`.
1087 /// `Some` containing the byte index of the last matching character
1088 /// or `None` if there is no match
1093 /// let s = "Löwe 老虎 Léopard";
1095 /// assert_eq!(s.find('L'), Some(0));
1096 /// assert_eq!(s.find('é'), Some(14));
1098 /// // the first space
1099 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1101 /// // neither are found
1102 /// let x: &[_] = &['1', '2'];
1103 /// assert_eq!(s.find(x), None);
1106 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1107 core_str::StrExt::find(&self[], pat)
1110 /// Returns the byte index of the last character of `self` that
1111 /// matches the pattern `pat`.
1115 /// `Some` containing the byte index of the last matching character
1116 /// or `None` if there is no match.
1121 /// let s = "Löwe 老虎 Léopard";
1123 /// assert_eq!(s.rfind('L'), Some(13));
1124 /// assert_eq!(s.rfind('é'), Some(14));
1126 /// // the second space
1127 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1129 /// // searches for an occurrence of either `1` or `2`, but neither are found
1130 /// let x: &[_] = &['1', '2'];
1131 /// assert_eq!(s.rfind(x), None);
1134 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1135 core_str::StrExt::rfind(&self[], pat)
1138 /// Returns the byte index of the first matching substring
1142 /// * `needle` - The string to search for
1146 /// `Some` containing the byte index of the first matching substring
1147 /// or `None` if there is no match.
1152 /// let s = "Löwe 老虎 Léopard";
1154 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1155 /// assert_eq!(s.find_str("muffin man"), None);
1157 #[unstable = "might get removed in favor of a more generic find in the future"]
1158 fn find_str(&self, needle: &str) -> Option<uint> {
1159 core_str::StrExt::find_str(&self[], needle)
1162 /// Retrieves the first character from a string slice and returns
1163 /// it. This does not allocate a new string; instead, it returns a
1164 /// slice that point one character beyond the character that was
1165 /// shifted. If the string does not contain any characters,
1166 /// None is returned instead.
1171 /// let s = "Löwe 老虎 Léopard";
1172 /// let (c, s1) = s.slice_shift_char().unwrap();
1173 /// assert_eq!(c, 'L');
1174 /// assert_eq!(s1, "öwe 老虎 Léopard");
1176 /// let (c, s2) = s1.slice_shift_char().unwrap();
1177 /// assert_eq!(c, 'ö');
1178 /// assert_eq!(s2, "we 老虎 Léopard");
1180 #[unstable = "awaiting conventions about shifting and slices"]
1181 fn slice_shift_char(&self) -> Option<(char, &str)> {
1182 core_str::StrExt::slice_shift_char(&self[])
1185 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1187 /// Panics if `inner` is not a direct slice contained within self.
1192 /// let string = "a\nb\nc";
1193 /// let lines: Vec<&str> = string.lines().collect();
1195 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1196 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1197 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1199 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1200 fn subslice_offset(&self, inner: &str) -> uint {
1201 core_str::StrExt::subslice_offset(&self[], inner)
1204 /// Return an unsafe pointer to the strings buffer.
1206 /// The caller must ensure that the string outlives this pointer,
1207 /// and that it is not reallocated (e.g. by pushing to the
1211 fn as_ptr(&self) -> *const u8 {
1212 core_str::StrExt::as_ptr(&self[])
1215 /// Return an iterator of `u16` over the string encoded as UTF-16.
1216 #[unstable = "this functionality may only be provided by libunicode"]
1217 fn utf16_units(&self) -> Utf16Units {
1218 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1221 /// Return the number of bytes in this string
1226 /// assert_eq!("foo".len(), 3);
1227 /// assert_eq!("ƒoo".len(), 4);
1231 fn len(&self) -> uint {
1232 core_str::StrExt::len(&self[])
1235 /// Returns true if this slice contains no bytes
1240 /// assert!("".is_empty());
1244 fn is_empty(&self) -> bool {
1245 core_str::StrExt::is_empty(&self[])
1248 /// Parse this string into the specified type.
1253 /// assert_eq!("4".parse::<u32>(), Some(4));
1254 /// assert_eq!("j".parse::<u32>(), None);
1257 #[unstable = "this method was just created"]
1258 fn parse<F: FromStr>(&self) -> Option<F> {
1259 core_str::StrExt::parse(&self[])
1262 /// Returns an iterator over the
1263 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1266 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1267 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1268 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1269 /// recommends extended grapheme cluster boundaries for general processing.
1274 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1275 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1276 /// assert_eq!(gr1.as_slice(), b);
1277 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1278 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1279 /// assert_eq!(gr2.as_slice(), b);
1281 #[unstable = "this functionality may only be provided by libunicode"]
1282 fn graphemes(&self, is_extended: bool) -> Graphemes {
1283 UnicodeStr::graphemes(&self[], is_extended)
1286 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1287 /// See `graphemes()` method for more information.
1292 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1293 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1294 /// assert_eq!(gr_inds.as_slice(), b);
1296 #[unstable = "this functionality may only be provided by libunicode"]
1297 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1298 UnicodeStr::grapheme_indices(&self[], is_extended)
1301 /// An iterator over the words of a string (subsequences separated
1302 /// by any sequence of whitespace). Sequences of whitespace are
1303 /// collapsed, so empty "words" are not included.
1308 /// let some_words = " Mary had\ta little \n\t lamb";
1309 /// let v: Vec<&str> = some_words.words().collect();
1310 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1313 fn words(&self) -> Words {
1314 UnicodeStr::words(&self[])
1317 /// Returns a string's displayed width in columns, treating control
1318 /// characters as zero-width.
1320 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1321 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1322 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1323 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1324 /// recommends that these characters be treated as 1 column (i.e.,
1325 /// `is_cjk` = `false`) if the locale is unknown.
1326 #[unstable = "this functionality may only be provided by libunicode"]
1327 fn width(&self, is_cjk: bool) -> uint {
1328 UnicodeStr::width(&self[], is_cjk)
1331 /// Returns a string with leading and trailing whitespace removed.
1333 fn trim(&self) -> &str {
1334 UnicodeStr::trim(&self[])
1337 /// Returns a string with leading whitespace removed.
1339 fn trim_left(&self) -> &str {
1340 UnicodeStr::trim_left(&self[])
1343 /// Returns a string with trailing whitespace removed.
1345 fn trim_right(&self) -> &str {
1346 UnicodeStr::trim_right(&self[])
1351 impl StrExt for str {}
1357 use core::iter::AdditiveIterator;
1358 use super::from_utf8;
1359 use super::Utf8Error;
1364 assert!("" <= "foo");
1365 assert!("foo" <= "foo");
1366 assert!("foo" != "bar");
1371 assert_eq!("".len(), 0u);
1372 assert_eq!("hello world".len(), 11u);
1373 assert_eq!("\x63".len(), 1u);
1374 assert_eq!("\u{a2}".len(), 2u);
1375 assert_eq!("\u{3c0}".len(), 2u);
1376 assert_eq!("\u{2620}".len(), 3u);
1377 assert_eq!("\u{1d11e}".len(), 4u);
1379 assert_eq!("".chars().count(), 0u);
1380 assert_eq!("hello world".chars().count(), 11u);
1381 assert_eq!("\x63".chars().count(), 1u);
1382 assert_eq!("\u{a2}".chars().count(), 1u);
1383 assert_eq!("\u{3c0}".chars().count(), 1u);
1384 assert_eq!("\u{2620}".chars().count(), 1u);
1385 assert_eq!("\u{1d11e}".chars().count(), 1u);
1386 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1388 assert_eq!("hello".width(false), 10u);
1389 assert_eq!("hello".width(true), 10u);
1390 assert_eq!("\0\0\0\0\0".width(false), 0u);
1391 assert_eq!("\0\0\0\0\0".width(true), 0u);
1392 assert_eq!("".width(false), 0u);
1393 assert_eq!("".width(true), 0u);
1394 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1395 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1400 assert_eq!("hello".find('l'), Some(2u));
1401 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1402 assert!("hello".find('x').is_none());
1403 assert!("hello".find(|&: c:char| c == 'x').is_none());
1404 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1405 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1410 assert_eq!("hello".rfind('l'), Some(3u));
1411 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1412 assert!("hello".rfind('x').is_none());
1413 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1414 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1415 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1420 let empty = String::from_str("");
1421 let s: String = empty.chars().collect();
1422 assert_eq!(empty, s);
1423 let data = String::from_str("ประเทศไทย中");
1424 let s: String = data.chars().collect();
1425 assert_eq!(data, s);
1429 fn test_into_bytes() {
1430 let data = String::from_str("asdf");
1431 let buf = data.into_bytes();
1432 assert_eq!(b"asdf", buf);
1436 fn test_find_str() {
1438 assert_eq!("".find_str(""), Some(0u));
1439 assert!("banana".find_str("apple pie").is_none());
1441 let data = "abcabc";
1442 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1443 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1444 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1446 let string = "ประเทศไทย中华Việt Nam";
1447 let mut data = String::from_str(string);
1448 data.push_str(string);
1449 assert!(data.find_str("ไท华").is_none());
1450 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1451 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1453 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1454 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1455 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1456 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1457 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1459 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1460 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1461 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1462 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1463 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1467 fn test_slice_chars() {
1468 fn t(a: &str, b: &str, start: uint) {
1469 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1472 t("hello", "llo", 2);
1473 t("hello", "el", 1);
1476 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1479 fn s(x: &str) -> String { x.to_string() }
1481 macro_rules! test_concat {
1482 ($expected: expr, $string: expr) => {
1484 let s: String = $string.concat();
1485 assert_eq!($expected, s);
1491 fn test_concat_for_different_types() {
1492 test_concat!("ab", vec![s("a"), s("b")]);
1493 test_concat!("ab", vec!["a", "b"]);
1494 test_concat!("ab", vec!["a", "b"].as_slice());
1495 test_concat!("ab", vec![s("a"), s("b")]);
1499 fn test_concat_for_different_lengths() {
1500 let empty: &[&str] = &[];
1501 test_concat!("", empty);
1502 test_concat!("a", ["a"]);
1503 test_concat!("ab", ["a", "b"]);
1504 test_concat!("abc", ["", "a", "bc"]);
1507 macro_rules! test_connect {
1508 ($expected: expr, $string: expr, $delim: expr) => {
1510 let s = $string.connect($delim);
1511 assert_eq!($expected, s);
1517 fn test_connect_for_different_types() {
1518 test_connect!("a-b", ["a", "b"], "-");
1519 let hyphen = "-".to_string();
1520 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1521 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1522 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1523 test_connect!("a-b", vec![s("a"), s("b")], "-");
1527 fn test_connect_for_different_lengths() {
1528 let empty: &[&str] = &[];
1529 test_connect!("", empty, "-");
1530 test_connect!("a", ["a"], "-");
1531 test_connect!("a-b", ["a", "b"], "-");
1532 test_connect!("-a-bc", ["", "a", "bc"], "-");
1536 fn test_unsafe_slice() {
1537 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1538 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1539 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1540 fn a_million_letter_a() -> String {
1542 let mut rs = String::new();
1544 rs.push_str("aaaaaaaaaa");
1549 fn half_a_million_letter_a() -> String {
1551 let mut rs = String::new();
1553 rs.push_str("aaaaa");
1558 let letters = a_million_letter_a();
1559 assert!(half_a_million_letter_a() ==
1560 unsafe {String::from_str(letters.slice_unchecked(
1566 fn test_starts_with() {
1567 assert!(("".starts_with("")));
1568 assert!(("abc".starts_with("")));
1569 assert!(("abc".starts_with("a")));
1570 assert!((!"a".starts_with("abc")));
1571 assert!((!"".starts_with("abc")));
1572 assert!((!"ödd".starts_with("-")));
1573 assert!(("ödd".starts_with("öd")));
1577 fn test_ends_with() {
1578 assert!(("".ends_with("")));
1579 assert!(("abc".ends_with("")));
1580 assert!(("abc".ends_with("c")));
1581 assert!((!"a".ends_with("abc")));
1582 assert!((!"".ends_with("abc")));
1583 assert!((!"ddö".ends_with("-")));
1584 assert!(("ddö".ends_with("dö")));
1588 fn test_is_empty() {
1589 assert!("".is_empty());
1590 assert!(!"a".is_empty());
1596 assert_eq!("".replace(a, "b"), String::from_str(""));
1597 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1598 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1600 assert!(" test test ".replace(test, "toast") ==
1601 String::from_str(" toast toast "));
1602 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1606 fn test_replace_2a() {
1607 let data = "ประเทศไทย中华";
1608 let repl = "دولة الكويت";
1611 let a2 = "دولة الكويتทศไทย中华";
1612 assert_eq!(data.replace(a, repl), a2);
1616 fn test_replace_2b() {
1617 let data = "ประเทศไทย中华";
1618 let repl = "دولة الكويت";
1621 let b2 = "ปรدولة الكويتทศไทย中华";
1622 assert_eq!(data.replace(b, repl), b2);
1626 fn test_replace_2c() {
1627 let data = "ประเทศไทย中华";
1628 let repl = "دولة الكويت";
1631 let c2 = "ประเทศไทยدولة الكويت";
1632 assert_eq!(data.replace(c, repl), c2);
1636 fn test_replace_2d() {
1637 let data = "ประเทศไทย中华";
1638 let repl = "دولة الكويت";
1641 assert_eq!(data.replace(d, repl), data);
1646 assert_eq!("ab", "abc".slice(0, 2));
1647 assert_eq!("bc", "abc".slice(1, 3));
1648 assert_eq!("", "abc".slice(1, 1));
1649 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1651 let data = "ประเทศไทย中华";
1652 assert_eq!("ป", data.slice(0, 3));
1653 assert_eq!("ร", data.slice(3, 6));
1654 assert_eq!("", data.slice(3, 3));
1655 assert_eq!("华", data.slice(30, 33));
1657 fn a_million_letter_x() -> String {
1659 let mut rs = String::new();
1661 rs.push_str("华华华华华华华华华华");
1666 fn half_a_million_letter_x() -> String {
1668 let mut rs = String::new();
1670 rs.push_str("华华华华华");
1675 let letters = a_million_letter_x();
1676 assert!(half_a_million_letter_x() ==
1677 String::from_str(letters.slice(0u, 3u * 500000u)));
1682 let ss = "中华Việt Nam";
1684 assert_eq!("华", ss.slice(3u, 6u));
1685 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1687 assert_eq!("ab", "abc".slice(0u, 2u));
1688 assert_eq!("bc", "abc".slice(1u, 3u));
1689 assert_eq!("", "abc".slice(1u, 1u));
1691 assert_eq!("中", ss.slice(0u, 3u));
1692 assert_eq!("华V", ss.slice(3u, 7u));
1693 assert_eq!("", ss.slice(3u, 3u));
1708 fn test_slice_fail() {
1709 "中华Việt Nam".slice(0u, 2u);
1713 fn test_slice_from() {
1714 assert_eq!("abcd".slice_from(0), "abcd");
1715 assert_eq!("abcd".slice_from(2), "cd");
1716 assert_eq!("abcd".slice_from(4), "");
1719 fn test_slice_to() {
1720 assert_eq!("abcd".slice_to(0), "");
1721 assert_eq!("abcd".slice_to(2), "ab");
1722 assert_eq!("abcd".slice_to(4), "abcd");
1726 fn test_trim_left_matches() {
1727 let v: &[char] = &[];
1728 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1729 let chars: &[char] = &['*', ' '];
1730 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1731 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1732 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1734 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1735 let chars: &[char] = &['1', '2'];
1736 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1737 assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1741 fn test_trim_right_matches() {
1742 let v: &[char] = &[];
1743 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1744 let chars: &[char] = &['*', ' '];
1745 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1746 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1747 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1749 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1750 let chars: &[char] = &['1', '2'];
1751 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1752 assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1756 fn test_trim_matches() {
1757 let v: &[char] = &[];
1758 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1759 let chars: &[char] = &['*', ' '];
1760 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1761 assert_eq!(" *** *** ".trim_matches(chars), "");
1762 assert_eq!("foo".trim_matches(chars), "foo");
1764 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1765 let chars: &[char] = &['1', '2'];
1766 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1767 assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1771 fn test_trim_left() {
1772 assert_eq!("".trim_left(), "");
1773 assert_eq!("a".trim_left(), "a");
1774 assert_eq!(" ".trim_left(), "");
1775 assert_eq!(" blah".trim_left(), "blah");
1776 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1777 assert_eq!("hey ".trim_left(), "hey ");
1781 fn test_trim_right() {
1782 assert_eq!("".trim_right(), "");
1783 assert_eq!("a".trim_right(), "a");
1784 assert_eq!(" ".trim_right(), "");
1785 assert_eq!("blah ".trim_right(), "blah");
1786 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1787 assert_eq!(" hey".trim_right(), " hey");
1792 assert_eq!("".trim(), "");
1793 assert_eq!("a".trim(), "a");
1794 assert_eq!(" ".trim(), "");
1795 assert_eq!(" blah ".trim(), "blah");
1796 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1797 assert_eq!(" hey dude ".trim(), "hey dude");
1801 fn test_is_whitespace() {
1802 assert!("".chars().all(|c| c.is_whitespace()));
1803 assert!(" ".chars().all(|c| c.is_whitespace()));
1804 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1805 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1806 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1810 fn test_slice_shift_char() {
1811 let data = "ประเทศไทย中";
1812 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1816 fn test_slice_shift_char_2() {
1818 assert_eq!(empty.slice_shift_char(), None);
1823 // deny overlong encodings
1824 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1825 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1826 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1827 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1828 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1829 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1830 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1833 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1834 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1836 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1837 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1838 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1839 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1840 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1841 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1842 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1843 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1847 fn test_is_utf16() {
1848 use unicode::str::is_utf16;
1850 ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } }
1859 // surrogate pairs (randomly generated with Python 3's
1860 // .encode('utf-16be'))
1861 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1862 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1863 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1865 // mixtures (also random)
1866 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1867 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1868 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1872 ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } }
1876 // surrogate + regular unit
1878 // surrogate + lead surrogate
1880 // unterminated surrogate
1882 // trail surrogate without a lead
1885 // random byte sequences that Python 3's .decode('utf-16be')
1887 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1888 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1889 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1890 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1891 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1892 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1893 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1894 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1895 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1896 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1897 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1898 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1899 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1900 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1901 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1902 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1903 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1904 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1905 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1906 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1907 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1911 fn test_as_bytes() {
1914 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1915 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1919 assert_eq!("".as_bytes(), b);
1920 assert_eq!("abc".as_bytes(), b"abc");
1921 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1926 fn test_as_bytes_fail() {
1927 // Don't double free. (I'm not sure if this exercises the
1928 // original problem code path anymore.)
1929 let s = String::from_str("");
1930 let _bytes = s.as_bytes();
1936 let buf = "hello".as_ptr();
1938 assert_eq!(*buf.offset(0), b'h');
1939 assert_eq!(*buf.offset(1), b'e');
1940 assert_eq!(*buf.offset(2), b'l');
1941 assert_eq!(*buf.offset(3), b'l');
1942 assert_eq!(*buf.offset(4), b'o');
1947 fn test_subslice_offset() {
1948 let a = "kernelsprite";
1949 let b = a.slice(7, a.len());
1950 let c = a.slice(0, a.len() - 6);
1951 assert_eq!(a.subslice_offset(b), 7);
1952 assert_eq!(a.subslice_offset(c), 0);
1954 let string = "a\nb\nc";
1955 let lines: Vec<&str> = string.lines().collect();
1956 assert_eq!(string.subslice_offset(lines[0]), 0);
1957 assert_eq!(string.subslice_offset(lines[1]), 2);
1958 assert_eq!(string.subslice_offset(lines[2]), 4);
1963 fn test_subslice_offset_2() {
1964 let a = "alchemiter";
1965 let b = "cruxtruder";
1966 a.subslice_offset(b);
1970 fn vec_str_conversions() {
1971 let s1: String = String::from_str("All mimsy were the borogoves");
1973 let v: Vec<u8> = s1.as_bytes().to_vec();
1974 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1975 let mut i: uint = 0u;
1976 let n1: uint = s1.len();
1977 let n2: uint = v.len();
1980 let a: u8 = s1.as_bytes()[i];
1981 let b: u8 = s2.as_bytes()[i];
1990 fn test_contains() {
1991 assert!("abcde".contains("bcd"));
1992 assert!("abcde".contains("abcd"));
1993 assert!("abcde".contains("bcde"));
1994 assert!("abcde".contains(""));
1995 assert!("".contains(""));
1996 assert!(!"abcde".contains("def"));
1997 assert!(!"".contains("a"));
1999 let data = "ประเทศไทย中华Việt Nam";
2000 assert!(data.contains("ประเ"));
2001 assert!(data.contains("ะเ"));
2002 assert!(data.contains("中华"));
2003 assert!(!data.contains("ไท华"));
2007 fn test_contains_char() {
2008 assert!("abc".contains_char('b'));
2009 assert!("a".contains_char('a'));
2010 assert!(!"abc".contains_char('d'));
2011 assert!(!"".contains_char('a'));
2016 let s = "ศไทย中华Việt Nam";
2017 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2019 for ch in v.iter() {
2020 assert!(s.char_at(pos) == *ch);
2021 pos += ch.to_string().len();
2026 fn test_char_at_reverse() {
2027 let s = "ศไทย中华Việt Nam";
2028 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2029 let mut pos = s.len();
2030 for ch in v.iter().rev() {
2031 assert!(s.char_at_reverse(pos) == *ch);
2032 pos -= ch.to_string().len();
2037 fn test_escape_unicode() {
2038 assert_eq!("abc".escape_unicode(),
2039 String::from_str("\\u{61}\\u{62}\\u{63}"));
2040 assert_eq!("a c".escape_unicode(),
2041 String::from_str("\\u{61}\\u{20}\\u{63}"));
2042 assert_eq!("\r\n\t".escape_unicode(),
2043 String::from_str("\\u{d}\\u{a}\\u{9}"));
2044 assert_eq!("'\"\\".escape_unicode(),
2045 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2046 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2047 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2048 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2049 String::from_str("\\u{100}\\u{ffff}"));
2050 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2051 String::from_str("\\u{10000}\\u{10ffff}"));
2052 assert_eq!("ab\u{fb00}".escape_unicode(),
2053 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2054 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2055 String::from_str("\\u{1d4ea}\\u{d}"));
2059 fn test_escape_default() {
2060 assert_eq!("abc".escape_default(), String::from_str("abc"));
2061 assert_eq!("a c".escape_default(), String::from_str("a c"));
2062 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2063 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2064 assert_eq!("\u{100}\u{ffff}".escape_default(),
2065 String::from_str("\\u{100}\\u{ffff}"));
2066 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2067 String::from_str("\\u{10000}\\u{10ffff}"));
2068 assert_eq!("ab\u{fb00}".escape_default(),
2069 String::from_str("ab\\u{fb00}"));
2070 assert_eq!("\u{1d4ea}\r".escape_default(),
2071 String::from_str("\\u{1d4ea}\\r"));
2075 fn test_total_ord() {
2076 "1234".cmp("123") == Greater;
2077 "123".cmp("1234") == Less;
2078 "1234".cmp("1234") == Equal;
2079 "12345555".cmp("123456") == Less;
2080 "22".cmp("1234") == Greater;
2084 fn test_char_range_at() {
2085 let data = "b¢€𤭢𤭢€¢b";
2086 assert_eq!('b', data.char_range_at(0).ch);
2087 assert_eq!('¢', data.char_range_at(1).ch);
2088 assert_eq!('€', data.char_range_at(3).ch);
2089 assert_eq!('𤭢', data.char_range_at(6).ch);
2090 assert_eq!('𤭢', data.char_range_at(10).ch);
2091 assert_eq!('€', data.char_range_at(14).ch);
2092 assert_eq!('¢', data.char_range_at(17).ch);
2093 assert_eq!('b', data.char_range_at(19).ch);
2097 fn test_char_range_at_reverse_underflow() {
2098 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2102 fn test_iterator() {
2103 let s = "ศไทย中华Việt Nam";
2104 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2107 let mut it = s.chars();
2110 assert_eq!(c, v[pos]);
2113 assert_eq!(pos, v.len());
2117 fn test_rev_iterator() {
2118 let s = "ศไทย中华Việt Nam";
2119 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2122 let mut it = s.chars().rev();
2125 assert_eq!(c, v[pos]);
2128 assert_eq!(pos, v.len());
2132 fn test_chars_decoding() {
2133 let mut bytes = [0u8; 4];
2134 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2135 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2136 let s = ::core::str::from_utf8(&bytes[..len]).unwrap();
2137 if Some(c) != s.chars().next() {
2138 panic!("character {:x}={} does not decode correctly", c as u32, c);
2144 fn test_chars_rev_decoding() {
2145 let mut bytes = [0u8; 4];
2146 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2147 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2148 let s = ::core::str::from_utf8(&bytes[..len]).unwrap();
2149 if Some(c) != s.chars().rev().next() {
2150 panic!("character {:x}={} does not decode correctly", c as u32, c);
2156 fn test_iterator_clone() {
2157 let s = "ศไทย中华Việt Nam";
2158 let mut it = s.chars();
2160 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2164 fn test_bytesator() {
2165 let s = "ศไทย中华Việt Nam";
2167 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2168 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2173 for b in s.bytes() {
2174 assert_eq!(b, v[pos]);
2180 fn test_bytes_revator() {
2181 let s = "ศไทย中华Việt Nam";
2183 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2184 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2187 let mut pos = v.len();
2189 for b in s.bytes().rev() {
2191 assert_eq!(b, v[pos]);
2196 fn test_char_indicesator() {
2197 let s = "ศไทย中华Việt Nam";
2198 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2199 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2202 let mut it = s.char_indices();
2205 assert_eq!(c, (p[pos], v[pos]));
2208 assert_eq!(pos, v.len());
2209 assert_eq!(pos, p.len());
2213 fn test_char_indices_revator() {
2214 let s = "ศไทย中华Việt Nam";
2215 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2216 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2219 let mut it = s.char_indices().rev();
2222 assert_eq!(c, (p[pos], v[pos]));
2225 assert_eq!(pos, v.len());
2226 assert_eq!(pos, p.len());
2230 fn test_splitn_char_iterator() {
2231 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2233 let split: Vec<&str> = data.splitn(3, ' ').collect();
2234 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2236 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2237 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2240 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2241 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2243 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2244 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2248 fn test_split_char_iterator_no_trailing() {
2249 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2251 let split: Vec<&str> = data.split('\n').collect();
2252 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2254 let split: Vec<&str> = data.split_terminator('\n').collect();
2255 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2260 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2261 let words: Vec<&str> = data.words().collect();
2262 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2266 fn test_nfd_chars() {
2268 ($input: expr, $expected: expr) => {
2269 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2273 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2274 t!("\u{2026}", "\u{2026}");
2275 t!("\u{2126}", "\u{3a9}");
2276 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2277 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2278 t!("a\u{301}", "a\u{301}");
2279 t!("\u{301}a", "\u{301}a");
2280 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2281 t!("\u{ac1c}", "\u{1100}\u{1162}");
2285 fn test_nfkd_chars() {
2287 ($input: expr, $expected: expr) => {
2288 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2292 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2293 t!("\u{2026}", "...");
2294 t!("\u{2126}", "\u{3a9}");
2295 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2296 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2297 t!("a\u{301}", "a\u{301}");
2298 t!("\u{301}a", "\u{301}a");
2299 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2300 t!("\u{ac1c}", "\u{1100}\u{1162}");
2304 fn test_nfc_chars() {
2306 ($input: expr, $expected: expr) => {
2307 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2311 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2312 t!("\u{2026}", "\u{2026}");
2313 t!("\u{2126}", "\u{3a9}");
2314 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2315 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2316 t!("a\u{301}", "\u{e1}");
2317 t!("\u{301}a", "\u{301}a");
2318 t!("\u{d4db}", "\u{d4db}");
2319 t!("\u{ac1c}", "\u{ac1c}");
2320 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2324 fn test_nfkc_chars() {
2326 ($input: expr, $expected: expr) => {
2327 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2331 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2332 t!("\u{2026}", "...");
2333 t!("\u{2126}", "\u{3a9}");
2334 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2335 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2336 t!("a\u{301}", "\u{e1}");
2337 t!("\u{301}a", "\u{301}a");
2338 t!("\u{d4db}", "\u{d4db}");
2339 t!("\u{ac1c}", "\u{ac1c}");
2340 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2345 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2346 let lines: Vec<&str> = data.lines().collect();
2347 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2349 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2350 let lines: Vec<&str> = data.lines().collect();
2351 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2355 fn test_graphemes() {
2356 use core::iter::order;
2357 // official Unicode test data
2358 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2359 let test_same: [(_, &[_]); 325] = [
2360 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2361 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2362 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2363 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2364 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2365 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2366 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2367 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2368 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2369 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2370 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2371 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2372 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2373 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2374 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2375 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2376 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2377 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2378 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2379 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2380 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2381 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2382 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2383 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2384 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2385 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2386 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2387 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2388 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2389 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2390 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2391 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2392 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2393 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2394 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2395 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2396 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2397 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2398 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2399 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2400 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2401 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2402 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2403 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2404 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2405 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2406 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2407 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2408 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2409 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2410 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2411 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2412 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2413 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2414 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2415 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2416 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2417 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2418 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2419 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2420 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2421 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2422 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2423 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2424 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2425 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2426 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2427 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2428 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2429 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2430 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2431 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2432 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2433 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2434 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2435 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2436 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2437 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2438 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2439 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2440 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2441 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2442 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2443 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2444 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2445 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2446 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2447 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2448 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2449 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2450 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2451 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2452 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2453 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2454 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2455 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2456 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2457 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2458 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2459 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2460 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2461 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2462 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2463 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2464 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2465 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2466 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2467 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2468 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2469 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2470 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2471 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2472 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2473 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2474 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2475 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2476 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2477 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2478 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2479 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2480 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2481 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2482 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2483 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2484 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2485 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2486 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2487 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2488 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2489 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2490 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2491 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2492 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2493 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2494 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2495 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2496 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2497 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2498 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2499 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2500 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2501 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2502 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2503 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2504 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2505 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2506 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2507 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2508 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2509 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2510 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2511 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2512 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2513 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2514 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2515 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2516 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2517 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2518 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2519 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2520 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2521 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2522 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2523 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2524 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2525 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2526 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2527 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2528 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2529 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2530 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2531 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2532 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2533 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2534 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2535 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2536 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2537 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2538 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2539 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2540 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2541 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2542 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2543 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2544 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2545 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2546 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2547 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2548 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2549 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2550 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2551 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2552 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2553 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2554 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2555 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2556 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2557 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2558 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2559 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2560 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2561 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2562 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2563 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2564 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2565 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2566 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2567 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2568 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2569 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2570 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2571 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2572 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2573 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2574 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2575 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2576 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2577 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2578 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2579 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2580 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2581 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2582 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2583 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2584 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2585 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2586 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2587 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2588 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2589 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2590 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2591 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2592 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2593 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2594 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2595 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2596 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2597 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2598 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2599 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2600 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2601 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2602 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2603 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2604 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2605 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2606 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2607 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2608 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2609 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2610 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2611 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2612 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2613 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2614 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2615 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2616 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2617 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2618 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2619 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2620 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2621 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2622 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2623 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2624 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2625 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2626 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2627 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2628 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2629 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2630 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2631 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2632 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2633 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2634 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2635 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2636 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2637 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2638 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2639 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2640 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2641 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2642 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2643 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2644 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2645 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2646 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2647 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2648 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2649 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2650 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2651 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2652 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2653 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2654 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2655 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2656 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2657 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2658 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2659 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2660 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2661 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2662 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2663 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2664 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2665 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2666 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2667 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2668 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2669 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2670 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2671 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2672 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2673 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2674 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2675 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2676 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2677 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2678 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2679 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2680 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2681 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2682 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2683 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2684 "\u{1F1E7}\u{1F1E8}"]),
2685 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2686 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2687 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2688 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2691 let test_diff: [(_, &[_], &[_]); 23] = [
2692 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2693 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2694 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2695 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2696 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2697 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2698 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2699 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2700 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2701 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2702 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2703 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2704 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2705 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2706 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2707 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2708 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2709 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2710 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2711 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2712 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2713 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2714 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2717 for &(s, g) in test_same.iter() {
2718 // test forward iterator
2719 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2720 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2722 // test reverse iterator
2723 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2724 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2727 for &(s, gt, gf) in test_diff.iter() {
2728 // test forward iterator
2729 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2730 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2732 // test reverse iterator
2733 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2734 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2737 // test the indices iterators
2738 let s = "a̐éö̲\r\n";
2739 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2740 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2741 assert_eq!(gr_inds, b);
2742 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2743 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2744 assert_eq!(gr_inds, b);
2745 let mut gr_inds_iter = s.grapheme_indices(true);
2747 let gr_inds = gr_inds_iter.by_ref();
2748 let e1 = gr_inds.size_hint();
2749 assert_eq!(e1, (1, Some(13)));
2750 let c = gr_inds.count();
2753 let e2 = gr_inds_iter.size_hint();
2754 assert_eq!(e2, (0, Some(0)));
2756 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2758 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2759 let b: &[_] = &["\r", "\r\n", "\n"];
2764 fn test_split_strator() {
2765 fn t(s: &str, sep: &str, u: &[&str]) {
2766 let v: Vec<&str> = s.split_str(sep).collect();
2769 t("--1233345--", "12345", &["--1233345--"]);
2770 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2771 t("::hello::there", "::", &["", "hello", "there"]);
2772 t("hello::there::", "::", &["hello", "there", ""]);
2773 t("::hello::there::", "::", &["", "hello", "there", ""]);
2774 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2775 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2776 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2777 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2779 t("zz", "zz", &["",""]);
2780 t("ok", "z", &["ok"]);
2781 t("zzz", "zz", &["","z"]);
2782 t("zzzzz", "zz", &["","","z"]);
2786 fn test_str_default() {
2787 use core::default::Default;
2788 fn t<S: Default + Str>() {
2789 let s: S = Default::default();
2790 assert_eq!(s.as_slice(), "");
2798 fn test_str_container() {
2799 fn sum_len(v: &[&str]) -> uint {
2800 v.iter().map(|x| x.len()).sum()
2803 let s = String::from_str("01234");
2804 assert_eq!(5, sum_len(&["012", "", "34"]));
2805 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2806 String::from_str("2").as_slice(),
2807 String::from_str("34").as_slice(),
2808 String::from_str("").as_slice()]));
2809 assert_eq!(5, sum_len(&[s.as_slice()]));
2813 fn test_str_from_utf8() {
2815 assert_eq!(from_utf8(xs), Ok("hello"));
2817 let xs = "ศไทย中华Việt Nam".as_bytes();
2818 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2820 let xs = b"hello\xFF";
2821 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2828 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2830 use test::black_box;
2833 fn char_iterator(b: &mut Bencher) {
2834 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2836 b.iter(|| s.chars().count());
2840 fn char_iterator_for(b: &mut Bencher) {
2841 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2844 for ch in s.chars() { black_box(ch); }
2849 fn char_iterator_ascii(b: &mut Bencher) {
2850 let s = "Mary had a little lamb, Little lamb
2851 Mary had a little lamb, Little lamb
2852 Mary had a little lamb, Little lamb
2853 Mary had a little lamb, Little lamb
2854 Mary had a little lamb, Little lamb
2855 Mary had a little lamb, Little lamb";
2857 b.iter(|| s.chars().count());
2861 fn char_iterator_rev(b: &mut Bencher) {
2862 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2864 b.iter(|| s.chars().rev().count());
2868 fn char_iterator_rev_for(b: &mut Bencher) {
2869 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2872 for ch in s.chars().rev() { black_box(ch); }
2877 fn char_indicesator(b: &mut Bencher) {
2878 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2879 let len = s.chars().count();
2881 b.iter(|| assert_eq!(s.char_indices().count(), len));
2885 fn char_indicesator_rev(b: &mut Bencher) {
2886 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2887 let len = s.chars().count();
2889 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2893 fn split_unicode_ascii(b: &mut Bencher) {
2894 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2896 b.iter(|| assert_eq!(s.split('V').count(), 3));
2900 fn split_unicode_not_ascii(b: &mut Bencher) {
2901 struct NotAscii(char);
2902 impl CharEq for NotAscii {
2903 fn matches(&mut self, c: char) -> bool {
2904 let NotAscii(cc) = *self;
2907 fn only_ascii(&self) -> bool { false }
2909 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2911 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2916 fn split_ascii(b: &mut Bencher) {
2917 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2918 let len = s.split(' ').count();
2920 b.iter(|| assert_eq!(s.split(' ').count(), len));
2924 fn split_not_ascii(b: &mut Bencher) {
2925 struct NotAscii(char);
2926 impl CharEq for NotAscii {
2928 fn matches(&mut self, c: char) -> bool {
2929 let NotAscii(cc) = *self;
2932 fn only_ascii(&self) -> bool { false }
2934 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2935 let len = s.split(' ').count();
2937 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2941 fn split_extern_fn(b: &mut Bencher) {
2942 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2943 let len = s.split(' ').count();
2944 fn pred(c: char) -> bool { c == ' ' }
2946 b.iter(|| assert_eq!(s.split(pred).count(), len));
2950 fn split_closure(b: &mut Bencher) {
2951 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2952 let len = s.split(' ').count();
2954 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2958 fn split_slice(b: &mut Bencher) {
2959 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2960 let len = s.split(' ').count();
2962 let c: &[char] = &[' '];
2963 b.iter(|| assert_eq!(s.split(c).count(), len));
2967 fn bench_connect(b: &mut Bencher) {
2968 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2970 let v = vec![s, s, s, s, s, s, s, s, s, s];
2972 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2977 fn bench_contains_short_short(b: &mut Bencher) {
2978 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2982 assert!(haystack.contains(needle));
2987 fn bench_contains_short_long(b: &mut Bencher) {
2989 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2990 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2991 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2992 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2993 tempus vel, gravida nec quam.
2995 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2996 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2997 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2998 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2999 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
3000 interdum. Curabitur ut nisi justo.
3002 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
3003 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
3004 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
3005 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
3006 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
3007 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3008 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3009 Aliquam sit amet placerat lorem.
3011 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3012 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3013 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3014 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3015 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3018 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3019 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3020 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3021 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3022 malesuada sollicitudin quam eu fermentum.";
3023 let needle = "english";
3026 assert!(!haystack.contains(needle));
3031 fn bench_contains_bad_naive(b: &mut Bencher) {
3032 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3033 let needle = "aaaaaaaab";
3036 assert!(!haystack.contains(needle));
3041 fn bench_contains_equal(b: &mut Bencher) {
3042 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3043 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3046 assert!(haystack.contains(needle));