1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
54 use self::RecompositionState::*;
55 use self::DecompositionType::*;
57 use core::borrow::{BorrowFrom, ToOwned};
58 use core::char::CharExt;
59 use core::clone::Clone;
60 use core::iter::AdditiveIterator;
61 use core::iter::{range, Iterator, IteratorExt};
62 use core::kinds::Sized;
64 use core::option::Option::{self, Some, None};
65 use core::slice::AsSlice;
66 use core::str as core_str;
67 use unicode::str::{UnicodeStr, Utf16Encoder};
69 use ring_buf::RingBuf;
74 use slice::SliceConcatExt;
76 pub use core::str::{FromStr, Utf8Error, Str};
77 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
78 pub use core::str::{Split, SplitTerminator};
79 pub use core::str::{SplitN, RSplitN};
80 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
81 pub use core::str::{from_utf8_unchecked, from_c_str};
82 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
85 Section: Creating a string
88 impl<S: Str> SliceConcatExt<str, String> for [S] {
89 fn concat(&self) -> String {
90 let s = self.as_slice();
96 // `len` calculation may overflow but push_str will check boundaries
97 let len = s.iter().map(|s| s.as_slice().len()).sum();
98 let mut result = String::with_capacity(len);
101 result.push_str(s.as_slice())
107 fn connect(&self, sep: &str) -> String {
108 let s = self.as_slice();
111 return String::new();
119 // this is wrong without the guarantee that `self` is non-empty
120 // `len` calculation may overflow but push_str but will check boundaries
121 let len = sep.len() * (s.len() - 1)
122 + s.iter().map(|s| s.as_slice().len()).sum();
123 let mut result = String::with_capacity(len);
124 let mut first = true;
130 result.push_str(sep);
132 result.push_str(s.as_slice());
142 // Helper functions used for Unicode normalization
143 fn canonical_sort(comb: &mut [(char, u8)]) {
144 let len = comb.len();
145 for i in range(0, len) {
146 let mut swapped = false;
147 for j in range(1, len-i) {
148 let class_a = comb[j-1].1;
149 let class_b = comb[j].1;
150 if class_a != 0 && class_b != 0 && class_a > class_b {
155 if !swapped { break; }
160 enum DecompositionType {
165 /// External iterator for a string's decomposition's characters.
166 /// Use with the `std::iter` module.
169 pub struct Decompositions<'a> {
170 kind: DecompositionType,
172 buffer: Vec<(char, u8)>,
177 impl<'a> Iterator for Decompositions<'a> {
181 fn next(&mut self) -> Option<char> {
182 match self.buffer.first() {
185 self.buffer.remove(0);
188 Some(&(c, _)) if self.sorted => {
189 self.buffer.remove(0);
192 _ => self.sorted = false
196 for ch in self.iter {
197 let buffer = &mut self.buffer;
198 let sorted = &mut self.sorted;
200 let callback = |&mut: d| {
202 unicode::char::canonical_combining_class(d);
203 if class == 0 && !*sorted {
204 canonical_sort(buffer.as_mut_slice());
207 buffer.push((d, class));
211 unicode::char::decompose_canonical(ch, callback)
214 unicode::char::decompose_compatible(ch, callback)
225 canonical_sort(self.buffer.as_mut_slice());
229 if self.buffer.is_empty() {
232 match self.buffer.remove(0) {
242 fn size_hint(&self) -> (uint, Option<uint>) {
243 let (lower, _) = self.iter.size_hint();
249 enum RecompositionState {
255 /// External iterator for a string's recomposition's characters.
256 /// Use with the `std::iter` module.
259 pub struct Recompositions<'a> {
260 iter: Decompositions<'a>,
261 state: RecompositionState,
262 buffer: RingBuf<char>,
263 composee: Option<char>,
268 impl<'a> Iterator for Recompositions<'a> {
272 fn next(&mut self) -> Option<char> {
276 for ch in self.iter {
277 let ch_class = unicode::char::canonical_combining_class(ch);
278 if self.composee.is_none() {
282 self.composee = Some(ch);
285 let k = self.composee.clone().unwrap();
287 match self.last_ccc {
289 match unicode::char::compose(k, ch) {
291 self.composee = Some(r);
296 self.composee = Some(ch);
299 self.buffer.push_back(ch);
300 self.last_ccc = Some(ch_class);
305 if l_class >= ch_class {
306 // `ch` is blocked from `composee`
308 self.composee = Some(ch);
309 self.last_ccc = None;
310 self.state = Purging;
313 self.buffer.push_back(ch);
314 self.last_ccc = Some(ch_class);
317 match unicode::char::compose(k, ch) {
319 self.composee = Some(r);
323 self.buffer.push_back(ch);
324 self.last_ccc = Some(ch_class);
330 self.state = Finished;
331 if self.composee.is_some() {
332 return self.composee.take();
336 match self.buffer.pop_front() {
337 None => self.state = Composing,
342 match self.buffer.pop_front() {
343 None => return self.composee.take(),
352 /// External iterator for a string's UTF16 codeunits.
353 /// Use with the `std::iter` module.
356 pub struct Utf16Units<'a> {
357 encoder: Utf16Encoder<Chars<'a>>
361 impl<'a> Iterator for Utf16Units<'a> {
365 fn next(&mut self) -> Option<u16> { self.encoder.next() }
368 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
375 // Return the initial codepoint accumulator for the first byte.
376 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
377 // for width 3, and 3 bits for width 4
378 macro_rules! utf8_first_byte {
379 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
382 // return the value of $ch updated with continuation byte $byte
383 macro_rules! utf8_acc_cont_byte {
384 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
387 #[unstable = "trait is unstable"]
388 impl BorrowFrom<String> for str {
389 fn borrow_from(owned: &String) -> &str { owned[] }
392 #[unstable = "trait is unstable"]
393 impl ToOwned<String> for str {
394 fn to_owned(&self) -> String {
396 String::from_utf8_unchecked(self.as_bytes().to_owned())
406 Section: Trait implementations
409 /// Any string that can be represented as a slice.
410 pub trait StrExt for Sized?: ops::Slice<uint, str> {
411 /// Escapes each char in `s` with `char::escape_default`.
412 #[unstable = "return type may change to be an iterator"]
413 fn escape_default(&self) -> String {
414 self.chars().flat_map(|c| c.escape_default()).collect()
417 /// Escapes each char in `s` with `char::escape_unicode`.
418 #[unstable = "return type may change to be an iterator"]
419 fn escape_unicode(&self) -> String {
420 self.chars().flat_map(|c| c.escape_unicode()).collect()
423 /// Replaces all occurrences of one string with another.
427 /// * `from` - The string to replace
428 /// * `to` - The replacement string
432 /// The original string with all occurrences of `from` replaced with `to`.
437 /// let s = "Do you know the muffin man,
438 /// The muffin man, the muffin man, ...".to_string();
440 /// assert_eq!(s.replace("muffin man", "little lamb"),
441 /// "Do you know the little lamb,
442 /// The little lamb, the little lamb, ...".to_string());
444 /// // not found, so no change.
445 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
448 fn replace(&self, from: &str, to: &str) -> String {
449 let mut result = String::new();
450 let mut last_end = 0;
451 for (start, end) in self.match_indices(from) {
452 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
456 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
460 /// Returns an iterator over the string in Unicode Normalization Form D
461 /// (canonical decomposition).
463 #[unstable = "this functionality may be moved to libunicode"]
464 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
466 iter: self[].chars(),
473 /// Returns an iterator over the string in Unicode Normalization Form KD
474 /// (compatibility decomposition).
476 #[unstable = "this functionality may be moved to libunicode"]
477 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
479 iter: self[].chars(),
486 /// An Iterator over the string in Unicode Normalization Form C
487 /// (canonical decomposition followed by canonical composition).
489 #[unstable = "this functionality may be moved to libunicode"]
490 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
492 iter: self.nfd_chars(),
494 buffer: RingBuf::new(),
500 /// An Iterator over the string in Unicode Normalization Form KC
501 /// (compatibility decomposition followed by canonical composition).
503 #[unstable = "this functionality may be moved to libunicode"]
504 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
506 iter: self.nfkd_chars(),
508 buffer: RingBuf::new(),
514 /// Returns true if a string contains a string pattern.
518 /// - pat - The string pattern to look for
523 /// assert!("bananas".contains("nana"));
526 fn contains(&self, pat: &str) -> bool {
527 core_str::StrExt::contains(self[], pat)
530 /// Returns true if a string contains a char pattern.
534 /// - pat - The char pattern to look for
539 /// assert!("hello".contains_char('e'));
541 #[unstable = "might get removed in favour of a more generic contains()"]
542 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
543 core_str::StrExt::contains_char(self[], pat)
546 /// An iterator over the characters of `self`. Note, this iterates
547 /// over Unicode code-points, not Unicode graphemes.
552 /// let v: Vec<char> = "abc åäö".chars().collect();
553 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
556 fn chars(&self) -> Chars {
557 core_str::StrExt::chars(self[])
560 /// An iterator over the bytes of `self`
565 /// let v: Vec<u8> = "bors".bytes().collect();
566 /// assert_eq!(v, b"bors".to_vec());
569 fn bytes(&self) -> Bytes {
570 core_str::StrExt::bytes(self[])
573 /// An iterator over the characters of `self` and their byte offsets.
575 fn char_indices(&self) -> CharIndices {
576 core_str::StrExt::char_indices(self[])
579 /// An iterator over substrings of `self`, separated by characters
580 /// matched by the pattern `pat`.
585 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
586 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
588 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
589 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
591 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
592 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
594 /// let v: Vec<&str> = "".split('X').collect();
595 /// assert_eq!(v, vec![""]);
598 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
599 core_str::StrExt::split(self[], pat)
602 /// An iterator over substrings of `self`, separated by characters
603 /// matched by the pattern `pat`, restricted to splitting at most `count`
609 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
610 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
612 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
613 /// assert_eq!(v, vec!["abc", "def2ghi"]);
615 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
616 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
618 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
619 /// assert_eq!(v, vec!["abcXdef"]);
621 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
622 /// assert_eq!(v, vec![""]);
625 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
626 core_str::StrExt::splitn(self[], count, pat)
629 /// An iterator over substrings of `self`, separated by characters
630 /// matched by the pattern `pat`.
632 /// Equivalent to `split`, except that the trailing substring
633 /// is skipped if empty (terminator semantics).
638 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
639 /// assert_eq!(v, vec!["A", "B"]);
641 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
642 /// assert_eq!(v, vec!["A", "", "B", ""]);
644 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
645 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
647 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
648 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
650 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
651 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
653 #[unstable = "might get removed"]
654 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
655 core_str::StrExt::split_terminator(self[], pat)
658 /// An iterator over substrings of `self`, separated by characters
659 /// matched by the pattern `pat`, starting from the end of the string.
660 /// Restricted to splitting at most `count` times.
665 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
666 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
668 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
669 /// assert_eq!(v, vec!["ghi", "abc1def"]);
671 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
672 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
675 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
676 core_str::StrExt::rsplitn(self[], count, pat)
679 /// An iterator over the start and end indices of the disjoint
680 /// matches of the pattern `pat` within `self`.
682 /// That is, each returned value `(start, end)` satisfies
683 /// `self.slice(start, end) == sep`. For matches of `sep` within
684 /// `self` that overlap, only the indices corresponding to the
685 /// first match are returned.
690 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
691 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
693 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
694 /// assert_eq!(v, vec![(1,4), (4,7)]);
696 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
697 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
699 #[unstable = "might have its iterator type changed"]
700 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
701 core_str::StrExt::match_indices(self[], pat)
704 /// An iterator over the substrings of `self` separated by the pattern `sep`.
709 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
710 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
712 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
713 /// assert_eq!(v, vec!["1", "", "2"]);
715 #[unstable = "might get removed in the future in favor of a more generic split()"]
716 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
717 core_str::StrExt::split_str(self[], pat)
720 /// An iterator over the lines of a string (subsequences separated
721 /// by `\n`). This does not include the empty string after a
727 /// let four_lines = "foo\nbar\n\nbaz\n";
728 /// let v: Vec<&str> = four_lines.lines().collect();
729 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
732 fn lines(&self) -> Lines {
733 core_str::StrExt::lines(self[])
736 /// An iterator over the lines of a string, separated by either
737 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
738 /// empty trailing line.
743 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
744 /// let v: Vec<&str> = four_lines.lines_any().collect();
745 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
748 fn lines_any(&self) -> LinesAny {
749 core_str::StrExt::lines_any(self[])
752 /// Returns a slice of the given string from the byte range
753 /// [`begin`..`end`).
755 /// This operation is `O(1)`.
757 /// Panics when `begin` and `end` do not point to valid characters
758 /// or point beyond the last character of the string.
760 /// See also `slice_to` and `slice_from` for slicing prefixes and
761 /// suffixes of strings, and `slice_chars` for slicing based on
762 /// code point counts.
767 /// let s = "Löwe 老虎 Léopard";
768 /// assert_eq!(s.slice(0, 1), "L");
770 /// assert_eq!(s.slice(1, 9), "öwe 老");
772 /// // these will panic:
773 /// // byte 2 lies within `ö`:
774 /// // s.slice(2, 3);
776 /// // byte 8 lies within `老`
777 /// // s.slice(1, 8);
779 /// // byte 100 is outside the string
780 /// // s.slice(3, 100);
782 #[unstable = "use slice notation [a..b] instead"]
783 fn slice(&self, begin: uint, end: uint) -> &str {
784 core_str::StrExt::slice(self[], begin, end)
787 /// Returns a slice of the string from `begin` to its end.
789 /// Equivalent to `self.slice(begin, self.len())`.
791 /// Panics when `begin` does not point to a valid character, or is
794 /// See also `slice`, `slice_to` and `slice_chars`.
795 #[unstable = "use slice notation [a..] instead"]
796 fn slice_from(&self, begin: uint) -> &str {
797 core_str::StrExt::slice_from(self[], begin)
800 /// Returns a slice of the string from the beginning to byte
803 /// Equivalent to `self.slice(0, end)`.
805 /// Panics when `end` does not point to a valid character, or is
808 /// See also `slice`, `slice_from` and `slice_chars`.
809 #[unstable = "use slice notation [0..a] instead"]
810 fn slice_to(&self, end: uint) -> &str {
811 core_str::StrExt::slice_to(self[], end)
814 /// Returns a slice of the string from the character range
815 /// [`begin`..`end`).
817 /// That is, start at the `begin`-th code point of the string and
818 /// continue to the `end`-th code point. This does not detect or
819 /// handle edge cases such as leaving a combining character as the
820 /// first code point of the string.
822 /// Due to the design of UTF-8, this operation is `O(end)`.
823 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
824 /// variants that use byte indices rather than code point
827 /// Panics if `begin` > `end` or the either `begin` or `end` are
828 /// beyond the last character of the string.
833 /// let s = "Löwe 老虎 Léopard";
834 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
835 /// assert_eq!(s.slice_chars(5, 7), "老虎");
837 #[unstable = "may have yet to prove its worth"]
838 fn slice_chars(&self, begin: uint, end: uint) -> &str {
839 core_str::StrExt::slice_chars(self[], begin, end)
842 /// Takes a bytewise (not UTF-8) slice from a string.
844 /// Returns the substring from [`begin`..`end`).
846 /// Caller must check both UTF-8 character boundaries and the boundaries of
847 /// the entire slice as well.
849 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
850 core_str::StrExt::slice_unchecked(self[], begin, end)
853 /// Returns true if the pattern `pat` is a prefix of the string.
858 /// assert!("banana".starts_with("ba"));
861 fn starts_with(&self, pat: &str) -> bool {
862 core_str::StrExt::starts_with(self[], pat)
865 /// Returns true if the pattern `pat` is a suffix of the string.
870 /// assert!("banana".ends_with("nana"));
873 fn ends_with(&self, pat: &str) -> bool {
874 core_str::StrExt::ends_with(self[], pat)
877 /// Returns a string with all pre- and suffixes that match
878 /// the pattern `pat` repeatedly removed.
882 /// * pat - a string pattern
887 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
888 /// let x: &[_] = &['1', '2'];
889 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
890 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
893 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
894 core_str::StrExt::trim_matches(self[], pat)
897 /// Returns a string with all prefixes that match
898 /// the pattern `pat` repeatedly removed.
902 /// * pat - a string pattern
907 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
908 /// let x: &[_] = &['1', '2'];
909 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
910 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
913 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
914 core_str::StrExt::trim_left_matches(self[], pat)
917 /// Returns a string with all suffixes that match
918 /// the pattern `pat` repeatedly removed.
922 /// * pat - a string pattern
927 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
928 /// let x: &[_] = &['1', '2'];
929 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
930 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
933 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
934 core_str::StrExt::trim_right_matches(self[], pat)
937 /// Check that `index`-th byte lies at the start and/or end of a
938 /// UTF-8 code point sequence.
940 /// The start and end of the string (when `index == self.len()`)
941 /// are considered to be boundaries.
943 /// Panics if `index` is greater than `self.len()`.
948 /// let s = "Löwe 老虎 Léopard";
949 /// assert!(s.is_char_boundary(0));
951 /// assert!(s.is_char_boundary(6));
952 /// assert!(s.is_char_boundary(s.len()));
954 /// // second byte of `ö`
955 /// assert!(!s.is_char_boundary(2));
957 /// // third byte of `老`
958 /// assert!(!s.is_char_boundary(8));
960 #[unstable = "naming is uncertain with container conventions"]
961 fn is_char_boundary(&self, index: uint) -> bool {
962 core_str::StrExt::is_char_boundary(self[], index)
965 /// Pluck a character out of a string and return the index of the next
968 /// This function can be used to iterate over the Unicode characters of a
973 /// This example manually iterates through the characters of a
974 /// string; this should normally be done by `.chars()` or
978 /// use std::str::CharRange;
980 /// let s = "中华Việt Nam";
982 /// while i < s.len() {
983 /// let CharRange {ch, next} = s.char_range_at(i);
984 /// println!("{}: {}", i, ch);
1006 /// * s - The string
1007 /// * i - The byte offset of the char to extract
1011 /// A record {ch: char, next: uint} containing the char value and the byte
1012 /// index of the next Unicode character.
1016 /// If `i` is greater than or equal to the length of the string.
1017 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1018 #[unstable = "naming is uncertain with container conventions"]
1019 fn char_range_at(&self, start: uint) -> CharRange {
1020 core_str::StrExt::char_range_at(self[], start)
1023 /// Given a byte position and a str, return the previous char and its position.
1025 /// This function can be used to iterate over a Unicode string in reverse.
1027 /// Returns 0 for next index if called on start index 0.
1031 /// If `i` is greater than the length of the string.
1032 /// If `i` is not an index following a valid UTF-8 character.
1033 #[unstable = "naming is uncertain with container conventions"]
1034 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1035 core_str::StrExt::char_range_at_reverse(self[], start)
1038 /// Plucks the character starting at the `i`th byte of a string.
1044 /// assert_eq!(s.char_at(1), 'b');
1045 /// assert_eq!(s.char_at(2), 'π');
1046 /// assert_eq!(s.char_at(4), 'c');
1051 /// If `i` is greater than or equal to the length of the string.
1052 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1053 #[unstable = "naming is uncertain with container conventions"]
1054 fn char_at(&self, i: uint) -> char {
1055 core_str::StrExt::char_at(self[], i)
1058 /// Plucks the character ending at the `i`th byte of a string.
1062 /// If `i` is greater than the length of the string.
1063 /// If `i` is not an index following a valid UTF-8 character.
1064 #[unstable = "naming is uncertain with container conventions"]
1065 fn char_at_reverse(&self, i: uint) -> char {
1066 core_str::StrExt::char_at_reverse(self[], i)
1069 /// Work with the byte buffer of a string as a byte slice.
1074 /// assert_eq!("bors".as_bytes(), b"bors");
1077 fn as_bytes(&self) -> &[u8] {
1078 core_str::StrExt::as_bytes(self[])
1081 /// Returns the byte index of the first character of `self` that
1082 /// matches the pattern `pat`.
1086 /// `Some` containing the byte index of the last matching character
1087 /// or `None` if there is no match
1092 /// let s = "Löwe 老虎 Léopard";
1094 /// assert_eq!(s.find('L'), Some(0));
1095 /// assert_eq!(s.find('é'), Some(14));
1097 /// // the first space
1098 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1100 /// // neither are found
1101 /// let x: &[_] = &['1', '2'];
1102 /// assert_eq!(s.find(x), None);
1105 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1106 core_str::StrExt::find(self[], pat)
1109 /// Returns the byte index of the last character of `self` that
1110 /// matches the pattern `pat`.
1114 /// `Some` containing the byte index of the last matching character
1115 /// or `None` if there is no match.
1120 /// let s = "Löwe 老虎 Léopard";
1122 /// assert_eq!(s.rfind('L'), Some(13));
1123 /// assert_eq!(s.rfind('é'), Some(14));
1125 /// // the second space
1126 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1128 /// // searches for an occurrence of either `1` or `2`, but neither are found
1129 /// let x: &[_] = &['1', '2'];
1130 /// assert_eq!(s.rfind(x), None);
1133 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1134 core_str::StrExt::rfind(self[], pat)
1137 /// Returns the byte index of the first matching substring
1141 /// * `needle` - The string to search for
1145 /// `Some` containing the byte index of the first matching substring
1146 /// or `None` if there is no match.
1151 /// let s = "Löwe 老虎 Léopard";
1153 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1154 /// assert_eq!(s.find_str("muffin man"), None);
1156 #[unstable = "might get removed in favor of a more generic find in the future"]
1157 fn find_str(&self, needle: &str) -> Option<uint> {
1158 core_str::StrExt::find_str(self[], needle)
1161 /// Retrieves the first character from a string slice and returns
1162 /// it. This does not allocate a new string; instead, it returns a
1163 /// slice that point one character beyond the character that was
1164 /// shifted. If the string does not contain any characters,
1165 /// None is returned instead.
1170 /// let s = "Löwe 老虎 Léopard";
1171 /// let (c, s1) = s.slice_shift_char().unwrap();
1172 /// assert_eq!(c, 'L');
1173 /// assert_eq!(s1, "öwe 老虎 Léopard");
1175 /// let (c, s2) = s1.slice_shift_char().unwrap();
1176 /// assert_eq!(c, 'ö');
1177 /// assert_eq!(s2, "we 老虎 Léopard");
1179 #[unstable = "awaiting conventions about shifting and slices"]
1180 fn slice_shift_char(&self) -> Option<(char, &str)> {
1181 core_str::StrExt::slice_shift_char(self[])
1184 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1186 /// Panics if `inner` is not a direct slice contained within self.
1191 /// let string = "a\nb\nc";
1192 /// let lines: Vec<&str> = string.lines().collect();
1194 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1195 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1196 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1198 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1199 fn subslice_offset(&self, inner: &str) -> uint {
1200 core_str::StrExt::subslice_offset(self[], inner)
1203 /// Return an unsafe pointer to the strings buffer.
1205 /// The caller must ensure that the string outlives this pointer,
1206 /// and that it is not reallocated (e.g. by pushing to the
1210 fn as_ptr(&self) -> *const u8 {
1211 core_str::StrExt::as_ptr(self[])
1214 /// Return an iterator of `u16` over the string encoded as UTF-16.
1215 #[unstable = "this functionality may only be provided by libunicode"]
1216 fn utf16_units(&self) -> Utf16Units {
1217 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1220 /// Return the number of bytes in this string
1225 /// assert_eq!("foo".len(), 3);
1226 /// assert_eq!("ƒoo".len(), 4);
1230 fn len(&self) -> uint {
1231 core_str::StrExt::len(self[])
1234 /// Returns true if this slice contains no bytes
1239 /// assert!("".is_empty());
1243 fn is_empty(&self) -> bool {
1244 core_str::StrExt::is_empty(self[])
1247 /// Parse this string into the specified type.
1252 /// assert_eq!("4".parse::<u32>(), Some(4));
1253 /// assert_eq!("j".parse::<u32>(), None);
1256 #[unstable = "this method was just created"]
1257 fn parse<F: FromStr>(&self) -> Option<F> {
1258 core_str::StrExt::parse(self[])
1261 /// Returns an iterator over the
1262 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1265 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1266 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1267 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1268 /// recommends extended grapheme cluster boundaries for general processing.
1273 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1274 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1275 /// assert_eq!(gr1.as_slice(), b);
1276 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1277 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1278 /// assert_eq!(gr2.as_slice(), b);
1280 #[unstable = "this functionality may only be provided by libunicode"]
1281 fn graphemes(&self, is_extended: bool) -> Graphemes {
1282 UnicodeStr::graphemes(self[], is_extended)
1285 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1286 /// See `graphemes()` method for more information.
1291 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1292 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1293 /// assert_eq!(gr_inds.as_slice(), b);
1295 #[unstable = "this functionality may only be provided by libunicode"]
1296 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1297 UnicodeStr::grapheme_indices(self[], is_extended)
1300 /// An iterator over the words of a string (subsequences separated
1301 /// by any sequence of whitespace). Sequences of whitespace are
1302 /// collapsed, so empty "words" are not included.
1307 /// let some_words = " Mary had\ta little \n\t lamb";
1308 /// let v: Vec<&str> = some_words.words().collect();
1309 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1312 fn words(&self) -> Words {
1313 UnicodeStr::words(self[])
1316 /// Returns a string's displayed width in columns, treating control
1317 /// characters as zero-width.
1319 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1320 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1321 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1322 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1323 /// recommends that these characters be treated as 1 column (i.e.,
1324 /// `is_cjk` = `false`) if the locale is unknown.
1325 #[unstable = "this functionality may only be provided by libunicode"]
1326 fn width(&self, is_cjk: bool) -> uint {
1327 UnicodeStr::width(self[], is_cjk)
1330 /// Returns a string with leading and trailing whitespace removed.
1332 fn trim(&self) -> &str {
1333 UnicodeStr::trim(self[])
1336 /// Returns a string with leading whitespace removed.
1338 fn trim_left(&self) -> &str {
1339 UnicodeStr::trim_left(self[])
1342 /// Returns a string with trailing whitespace removed.
1344 fn trim_right(&self) -> &str {
1345 UnicodeStr::trim_right(self[])
1349 impl StrExt for str {}
1355 use core::iter::AdditiveIterator;
1356 use super::from_utf8;
1357 use super::Utf8Error;
1362 assert!("" <= "foo");
1363 assert!("foo" <= "foo");
1364 assert!("foo" != "bar");
1369 assert_eq!("".len(), 0u);
1370 assert_eq!("hello world".len(), 11u);
1371 assert_eq!("\x63".len(), 1u);
1372 assert_eq!("\u{a2}".len(), 2u);
1373 assert_eq!("\u{3c0}".len(), 2u);
1374 assert_eq!("\u{2620}".len(), 3u);
1375 assert_eq!("\u{1d11e}".len(), 4u);
1377 assert_eq!("".chars().count(), 0u);
1378 assert_eq!("hello world".chars().count(), 11u);
1379 assert_eq!("\x63".chars().count(), 1u);
1380 assert_eq!("\u{a2}".chars().count(), 1u);
1381 assert_eq!("\u{3c0}".chars().count(), 1u);
1382 assert_eq!("\u{2620}".chars().count(), 1u);
1383 assert_eq!("\u{1d11e}".chars().count(), 1u);
1384 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1386 assert_eq!("hello".width(false), 10u);
1387 assert_eq!("hello".width(true), 10u);
1388 assert_eq!("\0\0\0\0\0".width(false), 0u);
1389 assert_eq!("\0\0\0\0\0".width(true), 0u);
1390 assert_eq!("".width(false), 0u);
1391 assert_eq!("".width(true), 0u);
1392 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1393 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1398 assert_eq!("hello".find('l'), Some(2u));
1399 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1400 assert!("hello".find('x').is_none());
1401 assert!("hello".find(|&: c:char| c == 'x').is_none());
1402 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1403 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1408 assert_eq!("hello".rfind('l'), Some(3u));
1409 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1410 assert!("hello".rfind('x').is_none());
1411 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1412 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1413 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1418 let empty = String::from_str("");
1419 let s: String = empty.chars().collect();
1420 assert_eq!(empty, s);
1421 let data = String::from_str("ประเทศไทย中");
1422 let s: String = data.chars().collect();
1423 assert_eq!(data, s);
1427 fn test_into_bytes() {
1428 let data = String::from_str("asdf");
1429 let buf = data.into_bytes();
1430 assert_eq!(b"asdf", buf);
1434 fn test_find_str() {
1436 assert_eq!("".find_str(""), Some(0u));
1437 assert!("banana".find_str("apple pie").is_none());
1439 let data = "abcabc";
1440 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1441 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1442 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1444 let string = "ประเทศไทย中华Việt Nam";
1445 let mut data = String::from_str(string);
1446 data.push_str(string);
1447 assert!(data.find_str("ไท华").is_none());
1448 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1449 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1451 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1452 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1453 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1454 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1455 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1457 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1458 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1459 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1460 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1461 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1465 fn test_slice_chars() {
1466 fn t(a: &str, b: &str, start: uint) {
1467 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1470 t("hello", "llo", 2);
1471 t("hello", "el", 1);
1474 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1477 fn s(x: &str) -> String { x.to_string() }
1479 macro_rules! test_concat {
1480 ($expected: expr, $string: expr) => {
1482 let s: String = $string.concat();
1483 assert_eq!($expected, s);
1489 fn test_concat_for_different_types() {
1490 test_concat!("ab", vec![s("a"), s("b")]);
1491 test_concat!("ab", vec!["a", "b"]);
1492 test_concat!("ab", vec!["a", "b"].as_slice());
1493 test_concat!("ab", vec![s("a"), s("b")]);
1497 fn test_concat_for_different_lengths() {
1498 let empty: &[&str] = &[];
1499 test_concat!("", empty);
1500 test_concat!("a", ["a"]);
1501 test_concat!("ab", ["a", "b"]);
1502 test_concat!("abc", ["", "a", "bc"]);
1505 macro_rules! test_connect {
1506 ($expected: expr, $string: expr, $delim: expr) => {
1508 let s = $string.connect($delim);
1509 assert_eq!($expected, s);
1515 fn test_connect_for_different_types() {
1516 test_connect!("a-b", ["a", "b"], "-");
1517 let hyphen = "-".to_string();
1518 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1519 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1520 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1521 test_connect!("a-b", vec![s("a"), s("b")], "-");
1525 fn test_connect_for_different_lengths() {
1526 let empty: &[&str] = &[];
1527 test_connect!("", empty, "-");
1528 test_connect!("a", ["a"], "-");
1529 test_connect!("a-b", ["a", "b"], "-");
1530 test_connect!("-a-bc", ["", "a", "bc"], "-");
1534 fn test_unsafe_slice() {
1535 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1536 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1537 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1538 fn a_million_letter_a() -> String {
1540 let mut rs = String::new();
1542 rs.push_str("aaaaaaaaaa");
1547 fn half_a_million_letter_a() -> String {
1549 let mut rs = String::new();
1551 rs.push_str("aaaaa");
1556 let letters = a_million_letter_a();
1557 assert!(half_a_million_letter_a() ==
1558 unsafe {String::from_str(letters.slice_unchecked(
1564 fn test_starts_with() {
1565 assert!(("".starts_with("")));
1566 assert!(("abc".starts_with("")));
1567 assert!(("abc".starts_with("a")));
1568 assert!((!"a".starts_with("abc")));
1569 assert!((!"".starts_with("abc")));
1570 assert!((!"ödd".starts_with("-")));
1571 assert!(("ödd".starts_with("öd")));
1575 fn test_ends_with() {
1576 assert!(("".ends_with("")));
1577 assert!(("abc".ends_with("")));
1578 assert!(("abc".ends_with("c")));
1579 assert!((!"a".ends_with("abc")));
1580 assert!((!"".ends_with("abc")));
1581 assert!((!"ddö".ends_with("-")));
1582 assert!(("ddö".ends_with("dö")));
1586 fn test_is_empty() {
1587 assert!("".is_empty());
1588 assert!(!"a".is_empty());
1594 assert_eq!("".replace(a, "b"), String::from_str(""));
1595 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1596 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1598 assert!(" test test ".replace(test, "toast") ==
1599 String::from_str(" toast toast "));
1600 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1604 fn test_replace_2a() {
1605 let data = "ประเทศไทย中华";
1606 let repl = "دولة الكويت";
1609 let a2 = "دولة الكويتทศไทย中华";
1610 assert_eq!(data.replace(a, repl), a2);
1614 fn test_replace_2b() {
1615 let data = "ประเทศไทย中华";
1616 let repl = "دولة الكويت";
1619 let b2 = "ปรدولة الكويتทศไทย中华";
1620 assert_eq!(data.replace(b, repl), b2);
1624 fn test_replace_2c() {
1625 let data = "ประเทศไทย中华";
1626 let repl = "دولة الكويت";
1629 let c2 = "ประเทศไทยدولة الكويت";
1630 assert_eq!(data.replace(c, repl), c2);
1634 fn test_replace_2d() {
1635 let data = "ประเทศไทย中华";
1636 let repl = "دولة الكويت";
1639 assert_eq!(data.replace(d, repl), data);
1644 assert_eq!("ab", "abc".slice(0, 2));
1645 assert_eq!("bc", "abc".slice(1, 3));
1646 assert_eq!("", "abc".slice(1, 1));
1647 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1649 let data = "ประเทศไทย中华";
1650 assert_eq!("ป", data.slice(0, 3));
1651 assert_eq!("ร", data.slice(3, 6));
1652 assert_eq!("", data.slice(3, 3));
1653 assert_eq!("华", data.slice(30, 33));
1655 fn a_million_letter_x() -> String {
1657 let mut rs = String::new();
1659 rs.push_str("华华华华华华华华华华");
1664 fn half_a_million_letter_x() -> String {
1666 let mut rs = String::new();
1668 rs.push_str("华华华华华");
1673 let letters = a_million_letter_x();
1674 assert!(half_a_million_letter_x() ==
1675 String::from_str(letters.slice(0u, 3u * 500000u)));
1680 let ss = "中华Việt Nam";
1682 assert_eq!("华", ss.slice(3u, 6u));
1683 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1685 assert_eq!("ab", "abc".slice(0u, 2u));
1686 assert_eq!("bc", "abc".slice(1u, 3u));
1687 assert_eq!("", "abc".slice(1u, 1u));
1689 assert_eq!("中", ss.slice(0u, 3u));
1690 assert_eq!("华V", ss.slice(3u, 7u));
1691 assert_eq!("", ss.slice(3u, 3u));
1706 fn test_slice_fail() {
1707 "中华Việt Nam".slice(0u, 2u);
1711 fn test_slice_from() {
1712 assert_eq!("abcd".slice_from(0), "abcd");
1713 assert_eq!("abcd".slice_from(2), "cd");
1714 assert_eq!("abcd".slice_from(4), "");
1717 fn test_slice_to() {
1718 assert_eq!("abcd".slice_to(0), "");
1719 assert_eq!("abcd".slice_to(2), "ab");
1720 assert_eq!("abcd".slice_to(4), "abcd");
1724 fn test_trim_left_matches() {
1725 let v: &[char] = &[];
1726 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1727 let chars: &[char] = &['*', ' '];
1728 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1729 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1730 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1732 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1733 let chars: &[char] = &['1', '2'];
1734 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1735 assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1739 fn test_trim_right_matches() {
1740 let v: &[char] = &[];
1741 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1742 let chars: &[char] = &['*', ' '];
1743 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1744 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1745 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1747 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1748 let chars: &[char] = &['1', '2'];
1749 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1750 assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1754 fn test_trim_matches() {
1755 let v: &[char] = &[];
1756 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1757 let chars: &[char] = &['*', ' '];
1758 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1759 assert_eq!(" *** *** ".trim_matches(chars), "");
1760 assert_eq!("foo".trim_matches(chars), "foo");
1762 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1763 let chars: &[char] = &['1', '2'];
1764 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1765 assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1769 fn test_trim_left() {
1770 assert_eq!("".trim_left(), "");
1771 assert_eq!("a".trim_left(), "a");
1772 assert_eq!(" ".trim_left(), "");
1773 assert_eq!(" blah".trim_left(), "blah");
1774 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1775 assert_eq!("hey ".trim_left(), "hey ");
1779 fn test_trim_right() {
1780 assert_eq!("".trim_right(), "");
1781 assert_eq!("a".trim_right(), "a");
1782 assert_eq!(" ".trim_right(), "");
1783 assert_eq!("blah ".trim_right(), "blah");
1784 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1785 assert_eq!(" hey".trim_right(), " hey");
1790 assert_eq!("".trim(), "");
1791 assert_eq!("a".trim(), "a");
1792 assert_eq!(" ".trim(), "");
1793 assert_eq!(" blah ".trim(), "blah");
1794 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1795 assert_eq!(" hey dude ".trim(), "hey dude");
1799 fn test_is_whitespace() {
1800 assert!("".chars().all(|c| c.is_whitespace()));
1801 assert!(" ".chars().all(|c| c.is_whitespace()));
1802 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1803 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1804 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1808 fn test_slice_shift_char() {
1809 let data = "ประเทศไทย中";
1810 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1814 fn test_slice_shift_char_2() {
1816 assert_eq!(empty.slice_shift_char(), None);
1821 // deny overlong encodings
1822 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1823 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1824 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1825 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1826 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1827 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1828 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1831 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1832 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1834 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1835 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1836 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1837 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1838 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1839 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1840 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1841 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1845 fn test_is_utf16() {
1846 use unicode::str::is_utf16;
1847 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1855 // surrogate pairs (randomly generated with Python 3's
1856 // .encode('utf-16be'))
1857 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1858 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1859 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1861 // mixtures (also random)
1862 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1863 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1864 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1867 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1870 // surrogate + regular unit
1872 // surrogate + lead surrogate
1874 // unterminated surrogate
1876 // trail surrogate without a lead
1879 // random byte sequences that Python 3's .decode('utf-16be')
1881 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1882 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1883 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1884 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1885 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1886 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1887 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1888 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1889 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1890 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1891 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1892 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1893 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1894 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1895 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1896 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1897 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1898 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1899 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1900 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1901 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1905 fn test_as_bytes() {
1908 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1909 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1913 assert_eq!("".as_bytes(), b);
1914 assert_eq!("abc".as_bytes(), b"abc");
1915 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1920 fn test_as_bytes_fail() {
1921 // Don't double free. (I'm not sure if this exercises the
1922 // original problem code path anymore.)
1923 let s = String::from_str("");
1924 let _bytes = s.as_bytes();
1930 let buf = "hello".as_ptr();
1932 assert_eq!(*buf.offset(0), b'h');
1933 assert_eq!(*buf.offset(1), b'e');
1934 assert_eq!(*buf.offset(2), b'l');
1935 assert_eq!(*buf.offset(3), b'l');
1936 assert_eq!(*buf.offset(4), b'o');
1941 fn test_subslice_offset() {
1942 let a = "kernelsprite";
1943 let b = a.slice(7, a.len());
1944 let c = a.slice(0, a.len() - 6);
1945 assert_eq!(a.subslice_offset(b), 7);
1946 assert_eq!(a.subslice_offset(c), 0);
1948 let string = "a\nb\nc";
1949 let lines: Vec<&str> = string.lines().collect();
1950 assert_eq!(string.subslice_offset(lines[0]), 0);
1951 assert_eq!(string.subslice_offset(lines[1]), 2);
1952 assert_eq!(string.subslice_offset(lines[2]), 4);
1957 fn test_subslice_offset_2() {
1958 let a = "alchemiter";
1959 let b = "cruxtruder";
1960 a.subslice_offset(b);
1964 fn vec_str_conversions() {
1965 let s1: String = String::from_str("All mimsy were the borogoves");
1967 let v: Vec<u8> = s1.as_bytes().to_vec();
1968 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1969 let mut i: uint = 0u;
1970 let n1: uint = s1.len();
1971 let n2: uint = v.len();
1974 let a: u8 = s1.as_bytes()[i];
1975 let b: u8 = s2.as_bytes()[i];
1984 fn test_contains() {
1985 assert!("abcde".contains("bcd"));
1986 assert!("abcde".contains("abcd"));
1987 assert!("abcde".contains("bcde"));
1988 assert!("abcde".contains(""));
1989 assert!("".contains(""));
1990 assert!(!"abcde".contains("def"));
1991 assert!(!"".contains("a"));
1993 let data = "ประเทศไทย中华Việt Nam";
1994 assert!(data.contains("ประเ"));
1995 assert!(data.contains("ะเ"));
1996 assert!(data.contains("中华"));
1997 assert!(!data.contains("ไท华"));
2001 fn test_contains_char() {
2002 assert!("abc".contains_char('b'));
2003 assert!("a".contains_char('a'));
2004 assert!(!"abc".contains_char('d'));
2005 assert!(!"".contains_char('a'));
2010 let s = "ศไทย中华Việt Nam";
2011 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2013 for ch in v.iter() {
2014 assert!(s.char_at(pos) == *ch);
2015 pos += ch.to_string().len();
2020 fn test_char_at_reverse() {
2021 let s = "ศไทย中华Việt Nam";
2022 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2023 let mut pos = s.len();
2024 for ch in v.iter().rev() {
2025 assert!(s.char_at_reverse(pos) == *ch);
2026 pos -= ch.to_string().len();
2031 fn test_escape_unicode() {
2032 assert_eq!("abc".escape_unicode(),
2033 String::from_str("\\u{61}\\u{62}\\u{63}"));
2034 assert_eq!("a c".escape_unicode(),
2035 String::from_str("\\u{61}\\u{20}\\u{63}"));
2036 assert_eq!("\r\n\t".escape_unicode(),
2037 String::from_str("\\u{d}\\u{a}\\u{9}"));
2038 assert_eq!("'\"\\".escape_unicode(),
2039 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2040 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2041 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2042 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2043 String::from_str("\\u{100}\\u{ffff}"));
2044 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2045 String::from_str("\\u{10000}\\u{10ffff}"));
2046 assert_eq!("ab\u{fb00}".escape_unicode(),
2047 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2048 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2049 String::from_str("\\u{1d4ea}\\u{d}"));
2053 fn test_escape_default() {
2054 assert_eq!("abc".escape_default(), String::from_str("abc"));
2055 assert_eq!("a c".escape_default(), String::from_str("a c"));
2056 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2057 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2058 assert_eq!("\u{100}\u{ffff}".escape_default(),
2059 String::from_str("\\u{100}\\u{ffff}"));
2060 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2061 String::from_str("\\u{10000}\\u{10ffff}"));
2062 assert_eq!("ab\u{fb00}".escape_default(),
2063 String::from_str("ab\\u{fb00}"));
2064 assert_eq!("\u{1d4ea}\r".escape_default(),
2065 String::from_str("\\u{1d4ea}\\r"));
2069 fn test_total_ord() {
2070 "1234".cmp("123") == Greater;
2071 "123".cmp("1234") == Less;
2072 "1234".cmp("1234") == Equal;
2073 "12345555".cmp("123456") == Less;
2074 "22".cmp("1234") == Greater;
2078 fn test_char_range_at() {
2079 let data = "b¢€𤭢𤭢€¢b";
2080 assert_eq!('b', data.char_range_at(0).ch);
2081 assert_eq!('¢', data.char_range_at(1).ch);
2082 assert_eq!('€', data.char_range_at(3).ch);
2083 assert_eq!('𤭢', data.char_range_at(6).ch);
2084 assert_eq!('𤭢', data.char_range_at(10).ch);
2085 assert_eq!('€', data.char_range_at(14).ch);
2086 assert_eq!('¢', data.char_range_at(17).ch);
2087 assert_eq!('b', data.char_range_at(19).ch);
2091 fn test_char_range_at_reverse_underflow() {
2092 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2096 fn test_iterator() {
2097 let s = "ศไทย中华Việt Nam";
2098 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2101 let mut it = s.chars();
2104 assert_eq!(c, v[pos]);
2107 assert_eq!(pos, v.len());
2111 fn test_rev_iterator() {
2112 let s = "ศไทย中华Việt Nam";
2113 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2116 let mut it = s.chars().rev();
2119 assert_eq!(c, v[pos]);
2122 assert_eq!(pos, v.len());
2126 fn test_chars_decoding() {
2127 let mut bytes = [0u8; 4];
2128 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2129 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2130 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2131 if Some(c) != s.chars().next() {
2132 panic!("character {:x}={} does not decode correctly", c as u32, c);
2138 fn test_chars_rev_decoding() {
2139 let mut bytes = [0u8; 4];
2140 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2141 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2142 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2143 if Some(c) != s.chars().rev().next() {
2144 panic!("character {:x}={} does not decode correctly", c as u32, c);
2150 fn test_iterator_clone() {
2151 let s = "ศไทย中华Việt Nam";
2152 let mut it = s.chars();
2154 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2158 fn test_bytesator() {
2159 let s = "ศไทย中华Việt Nam";
2161 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2162 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2167 for b in s.bytes() {
2168 assert_eq!(b, v[pos]);
2174 fn test_bytes_revator() {
2175 let s = "ศไทย中华Việt Nam";
2177 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2178 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2181 let mut pos = v.len();
2183 for b in s.bytes().rev() {
2185 assert_eq!(b, v[pos]);
2190 fn test_char_indicesator() {
2191 let s = "ศไทย中华Việt Nam";
2192 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2193 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2196 let mut it = s.char_indices();
2199 assert_eq!(c, (p[pos], v[pos]));
2202 assert_eq!(pos, v.len());
2203 assert_eq!(pos, p.len());
2207 fn test_char_indices_revator() {
2208 let s = "ศไทย中华Việt Nam";
2209 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2210 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2213 let mut it = s.char_indices().rev();
2216 assert_eq!(c, (p[pos], v[pos]));
2219 assert_eq!(pos, v.len());
2220 assert_eq!(pos, p.len());
2224 fn test_splitn_char_iterator() {
2225 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2227 let split: Vec<&str> = data.splitn(3, ' ').collect();
2228 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2230 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2231 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2234 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2235 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2237 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2238 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2242 fn test_split_char_iterator_no_trailing() {
2243 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2245 let split: Vec<&str> = data.split('\n').collect();
2246 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2248 let split: Vec<&str> = data.split_terminator('\n').collect();
2249 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2254 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2255 let words: Vec<&str> = data.words().collect();
2256 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2260 fn test_nfd_chars() {
2262 ($input: expr, $expected: expr) => {
2263 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2267 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2268 t!("\u{2026}", "\u{2026}");
2269 t!("\u{2126}", "\u{3a9}");
2270 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2271 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2272 t!("a\u{301}", "a\u{301}");
2273 t!("\u{301}a", "\u{301}a");
2274 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2275 t!("\u{ac1c}", "\u{1100}\u{1162}");
2279 fn test_nfkd_chars() {
2281 ($input: expr, $expected: expr) => {
2282 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2286 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2287 t!("\u{2026}", "...");
2288 t!("\u{2126}", "\u{3a9}");
2289 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2290 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2291 t!("a\u{301}", "a\u{301}");
2292 t!("\u{301}a", "\u{301}a");
2293 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2294 t!("\u{ac1c}", "\u{1100}\u{1162}");
2298 fn test_nfc_chars() {
2300 ($input: expr, $expected: expr) => {
2301 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2305 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2306 t!("\u{2026}", "\u{2026}");
2307 t!("\u{2126}", "\u{3a9}");
2308 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2309 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2310 t!("a\u{301}", "\u{e1}");
2311 t!("\u{301}a", "\u{301}a");
2312 t!("\u{d4db}", "\u{d4db}");
2313 t!("\u{ac1c}", "\u{ac1c}");
2314 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2318 fn test_nfkc_chars() {
2320 ($input: expr, $expected: expr) => {
2321 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2325 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2326 t!("\u{2026}", "...");
2327 t!("\u{2126}", "\u{3a9}");
2328 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2329 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2330 t!("a\u{301}", "\u{e1}");
2331 t!("\u{301}a", "\u{301}a");
2332 t!("\u{d4db}", "\u{d4db}");
2333 t!("\u{ac1c}", "\u{ac1c}");
2334 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2339 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2340 let lines: Vec<&str> = data.lines().collect();
2341 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2343 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2344 let lines: Vec<&str> = data.lines().collect();
2345 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2349 fn test_graphemes() {
2350 use core::iter::order;
2351 // official Unicode test data
2352 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2353 let test_same: [(_, &[_]); 325] = [
2354 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2355 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2356 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2357 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2358 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2359 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2360 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2361 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2362 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2363 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2364 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2365 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2366 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2367 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2368 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2369 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2370 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2371 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2372 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2373 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2374 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2375 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2376 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2377 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2378 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2379 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2380 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2381 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2382 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2383 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2384 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2385 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2386 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2387 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2388 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2389 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2390 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2391 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2392 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2393 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2394 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2395 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2396 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2397 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2398 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2399 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2400 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2401 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2402 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2403 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2404 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2405 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2406 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2407 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2408 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2409 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2410 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2411 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2412 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2413 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2414 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2415 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2416 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2417 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2418 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2419 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2420 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2421 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2422 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2423 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2424 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2425 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2426 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2427 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2428 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2429 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2430 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2431 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2432 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2433 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2434 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2435 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2436 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2437 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2438 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2439 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2440 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2441 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2442 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2443 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2444 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2445 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2446 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2447 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2448 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2449 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2450 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2451 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2452 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2453 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2454 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2455 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2456 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2457 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2458 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2459 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2460 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2461 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2462 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2463 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2464 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2465 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2466 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2467 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2468 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2469 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2470 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2471 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2472 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2473 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2474 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2475 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2476 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2477 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2478 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2479 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2480 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2481 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2482 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2483 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2484 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2485 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2486 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2487 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2488 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2489 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2490 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2491 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2492 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2493 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2494 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2495 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2496 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2497 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2498 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2499 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2500 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2501 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2502 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2503 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2504 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2505 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2506 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2507 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2508 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2509 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2510 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2511 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2512 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2513 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2514 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2515 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2516 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2517 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2518 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2519 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2520 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2521 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2522 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2523 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2524 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2525 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2526 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2527 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2528 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2529 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2530 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2531 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2532 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2533 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2534 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2535 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2536 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2537 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2538 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2539 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2540 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2541 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2542 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2543 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2544 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2545 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2546 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2547 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2548 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2549 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2550 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2551 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2552 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2553 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2554 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2555 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2556 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2557 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2558 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2559 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2560 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2561 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2562 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2563 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2564 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2565 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2566 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2567 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2568 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2569 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2570 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2571 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2572 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2573 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2574 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2575 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2576 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2577 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2578 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2579 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2580 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2581 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2582 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2583 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2584 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2585 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2586 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2587 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2588 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2589 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2590 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2591 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2592 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2593 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2594 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2595 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2596 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2597 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2598 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2599 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2600 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2601 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2602 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2603 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2604 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2605 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2606 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2607 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2608 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2609 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2610 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2611 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2612 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2613 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2614 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2615 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2616 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2617 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2618 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2619 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2620 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2621 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2622 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2623 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2624 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2625 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2626 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2627 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2628 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2629 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2630 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2631 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2632 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2633 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2634 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2635 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2636 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2637 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2638 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2639 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2640 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2641 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2642 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2643 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2644 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2645 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2646 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2647 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2648 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2649 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2650 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2651 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2652 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2653 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2654 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2655 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2656 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2657 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2658 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2659 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2660 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2661 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2662 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2663 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2664 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2665 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2666 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2667 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2668 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2669 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2670 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2671 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2672 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2673 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2674 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2675 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2676 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2677 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2678 "\u{1F1E7}\u{1F1E8}"]),
2679 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2680 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2681 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2682 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2685 let test_diff: [(_, &[_], &[_]); 23] = [
2686 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2687 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2688 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2689 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2690 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2691 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2692 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2693 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2694 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2695 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2696 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2697 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2698 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2699 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2700 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2701 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2702 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2703 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2704 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2705 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2706 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2707 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2708 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2711 for &(s, g) in test_same.iter() {
2712 // test forward iterator
2713 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2714 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2716 // test reverse iterator
2717 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2718 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2721 for &(s, gt, gf) in test_diff.iter() {
2722 // test forward iterator
2723 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2724 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2726 // test reverse iterator
2727 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2728 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2731 // test the indices iterators
2732 let s = "a̐éö̲\r\n";
2733 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2734 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2735 assert_eq!(gr_inds, b);
2736 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2737 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2738 assert_eq!(gr_inds, b);
2739 let mut gr_inds_iter = s.grapheme_indices(true);
2741 let gr_inds = gr_inds_iter.by_ref();
2742 let e1 = gr_inds.size_hint();
2743 assert_eq!(e1, (1, Some(13)));
2744 let c = gr_inds.count();
2747 let e2 = gr_inds_iter.size_hint();
2748 assert_eq!(e2, (0, Some(0)));
2750 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2752 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2753 let b: &[_] = &["\r", "\r\n", "\n"];
2758 fn test_split_strator() {
2759 fn t(s: &str, sep: &str, u: &[&str]) {
2760 let v: Vec<&str> = s.split_str(sep).collect();
2763 t("--1233345--", "12345", &["--1233345--"]);
2764 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2765 t("::hello::there", "::", &["", "hello", "there"]);
2766 t("hello::there::", "::", &["hello", "there", ""]);
2767 t("::hello::there::", "::", &["", "hello", "there", ""]);
2768 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2769 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2770 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2771 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2773 t("zz", "zz", &["",""]);
2774 t("ok", "z", &["ok"]);
2775 t("zzz", "zz", &["","z"]);
2776 t("zzzzz", "zz", &["","","z"]);
2780 fn test_str_default() {
2781 use core::default::Default;
2782 fn t<S: Default + Str>() {
2783 let s: S = Default::default();
2784 assert_eq!(s.as_slice(), "");
2792 fn test_str_container() {
2793 fn sum_len(v: &[&str]) -> uint {
2794 v.iter().map(|x| x.len()).sum()
2797 let s = String::from_str("01234");
2798 assert_eq!(5, sum_len(&["012", "", "34"]));
2799 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2800 String::from_str("2").as_slice(),
2801 String::from_str("34").as_slice(),
2802 String::from_str("").as_slice()]));
2803 assert_eq!(5, sum_len(&[s.as_slice()]));
2807 fn test_str_from_utf8() {
2809 assert_eq!(from_utf8(xs), Ok("hello"));
2811 let xs = "ศไทย中华Việt Nam".as_bytes();
2812 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2814 let xs = b"hello\xFF";
2815 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2822 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2824 use test::black_box;
2827 fn char_iterator(b: &mut Bencher) {
2828 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2830 b.iter(|| s.chars().count());
2834 fn char_iterator_for(b: &mut Bencher) {
2835 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2838 for ch in s.chars() { black_box(ch) }
2843 fn char_iterator_ascii(b: &mut Bencher) {
2844 let s = "Mary had a little lamb, Little lamb
2845 Mary had a little lamb, Little lamb
2846 Mary had a little lamb, Little lamb
2847 Mary had a little lamb, Little lamb
2848 Mary had a little lamb, Little lamb
2849 Mary had a little lamb, Little lamb";
2851 b.iter(|| s.chars().count());
2855 fn char_iterator_rev(b: &mut Bencher) {
2856 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2858 b.iter(|| s.chars().rev().count());
2862 fn char_iterator_rev_for(b: &mut Bencher) {
2863 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2866 for ch in s.chars().rev() { black_box(ch) }
2871 fn char_indicesator(b: &mut Bencher) {
2872 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2873 let len = s.chars().count();
2875 b.iter(|| assert_eq!(s.char_indices().count(), len));
2879 fn char_indicesator_rev(b: &mut Bencher) {
2880 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2881 let len = s.chars().count();
2883 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2887 fn split_unicode_ascii(b: &mut Bencher) {
2888 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2890 b.iter(|| assert_eq!(s.split('V').count(), 3));
2894 fn split_unicode_not_ascii(b: &mut Bencher) {
2895 struct NotAscii(char);
2896 impl CharEq for NotAscii {
2897 fn matches(&mut self, c: char) -> bool {
2898 let NotAscii(cc) = *self;
2901 fn only_ascii(&self) -> bool { false }
2903 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2905 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2910 fn split_ascii(b: &mut Bencher) {
2911 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2912 let len = s.split(' ').count();
2914 b.iter(|| assert_eq!(s.split(' ').count(), len));
2918 fn split_not_ascii(b: &mut Bencher) {
2919 struct NotAscii(char);
2920 impl CharEq for NotAscii {
2922 fn matches(&mut self, c: char) -> bool {
2923 let NotAscii(cc) = *self;
2926 fn only_ascii(&self) -> bool { false }
2928 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2929 let len = s.split(' ').count();
2931 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2935 fn split_extern_fn(b: &mut Bencher) {
2936 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2937 let len = s.split(' ').count();
2938 fn pred(c: char) -> bool { c == ' ' }
2940 b.iter(|| assert_eq!(s.split(pred).count(), len));
2944 fn split_closure(b: &mut Bencher) {
2945 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2946 let len = s.split(' ').count();
2948 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2952 fn split_slice(b: &mut Bencher) {
2953 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2954 let len = s.split(' ').count();
2956 let c: &[char] = &[' '];
2957 b.iter(|| assert_eq!(s.split(c).count(), len));
2961 fn bench_connect(b: &mut Bencher) {
2962 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2964 let v = vec![s, s, s, s, s, s, s, s, s, s];
2966 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2971 fn bench_contains_short_short(b: &mut Bencher) {
2972 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2976 assert!(haystack.contains(needle));
2981 fn bench_contains_short_long(b: &mut Bencher) {
2983 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2984 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2985 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2986 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2987 tempus vel, gravida nec quam.
2989 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2990 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2991 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2992 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2993 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2994 interdum. Curabitur ut nisi justo.
2996 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2997 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2998 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2999 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
3000 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
3001 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3002 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3003 Aliquam sit amet placerat lorem.
3005 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3006 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3007 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3008 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3009 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3012 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3013 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3014 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3015 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3016 malesuada sollicitudin quam eu fermentum.";
3017 let needle = "english";
3020 assert!(!haystack.contains(needle));
3025 fn bench_contains_bad_naive(b: &mut Bencher) {
3026 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3027 let needle = "aaaaaaaab";
3030 assert!(!haystack.contains(needle));
3035 fn bench_contains_equal(b: &mut Bencher) {
3036 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3037 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3040 assert!(haystack.contains(needle));