1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
54 use self::RecompositionState::*;
55 use self::DecompositionType::*;
57 use core::borrow::{BorrowFrom, ToOwned};
59 use core::clone::Clone;
60 use core::iter::AdditiveIterator;
61 use core::iter::{range, Iterator, IteratorExt};
62 use core::kinds::Sized;
64 use core::option::Option::{self, Some, None};
65 use core::slice::AsSlice;
66 use core::str as core_str;
67 use unicode::str::{UnicodeStr, Utf16Encoder};
69 use ring_buf::RingBuf;
74 use slice::SliceConcatExt;
76 pub use core::str::{FromStr, Utf8Error, Str};
77 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
78 pub use core::str::{Split, SplitTerminator};
79 pub use core::str::{SplitN, RSplitN};
80 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
81 pub use core::str::{from_utf8_unchecked, from_c_str};
82 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
85 Section: Creating a string
88 impl<S: Str> SliceConcatExt<str, String> for [S] {
89 fn concat(&self) -> String {
90 let s = self.as_slice();
96 // `len` calculation may overflow but push_str will check boundaries
97 let len = s.iter().map(|s| s.as_slice().len()).sum();
98 let mut result = String::with_capacity(len);
101 result.push_str(s.as_slice())
107 fn connect(&self, sep: &str) -> String {
108 let s = self.as_slice();
111 return String::new();
119 // this is wrong without the guarantee that `self` is non-empty
120 // `len` calculation may overflow but push_str but will check boundaries
121 let len = sep.len() * (s.len() - 1)
122 + s.iter().map(|s| s.as_slice().len()).sum();
123 let mut result = String::with_capacity(len);
124 let mut first = true;
130 result.push_str(sep);
132 result.push_str(s.as_slice());
142 // Helper functions used for Unicode normalization
143 fn canonical_sort(comb: &mut [(char, u8)]) {
144 let len = comb.len();
145 for i in range(0, len) {
146 let mut swapped = false;
147 for j in range(1, len-i) {
148 let class_a = comb[j-1].1;
149 let class_b = comb[j].1;
150 if class_a != 0 && class_b != 0 && class_a > class_b {
155 if !swapped { break; }
160 enum DecompositionType {
165 /// External iterator for a string's decomposition's characters.
166 /// Use with the `std::iter` module.
168 pub struct Decompositions<'a> {
169 kind: DecompositionType,
171 buffer: Vec<(char, u8)>,
175 impl<'a> Iterator for Decompositions<'a> {
179 fn next(&mut self) -> Option<char> {
180 match self.buffer.first() {
183 self.buffer.remove(0);
186 Some(&(c, _)) if self.sorted => {
187 self.buffer.remove(0);
190 _ => self.sorted = false
194 for ch in self.iter {
195 let buffer = &mut self.buffer;
196 let sorted = &mut self.sorted;
198 let callback = |&mut: d| {
200 unicode::char::canonical_combining_class(d);
201 if class == 0 && !*sorted {
202 canonical_sort(buffer.as_mut_slice());
205 buffer.push((d, class));
209 unicode::char::decompose_canonical(ch, callback)
212 unicode::char::decompose_compatible(ch, callback)
223 canonical_sort(self.buffer.as_mut_slice());
227 if self.buffer.is_empty() {
230 match self.buffer.remove(0) {
240 fn size_hint(&self) -> (uint, Option<uint>) {
241 let (lower, _) = self.iter.size_hint();
247 enum RecompositionState {
253 /// External iterator for a string's recomposition's characters.
254 /// Use with the `std::iter` module.
256 pub struct Recompositions<'a> {
257 iter: Decompositions<'a>,
258 state: RecompositionState,
259 buffer: RingBuf<char>,
260 composee: Option<char>,
264 impl<'a> Iterator for Recompositions<'a> {
268 fn next(&mut self) -> Option<char> {
272 for ch in self.iter {
273 let ch_class = unicode::char::canonical_combining_class(ch);
274 if self.composee.is_none() {
278 self.composee = Some(ch);
281 let k = self.composee.clone().unwrap();
283 match self.last_ccc {
285 match unicode::char::compose(k, ch) {
287 self.composee = Some(r);
292 self.composee = Some(ch);
295 self.buffer.push_back(ch);
296 self.last_ccc = Some(ch_class);
301 if l_class >= ch_class {
302 // `ch` is blocked from `composee`
304 self.composee = Some(ch);
305 self.last_ccc = None;
306 self.state = Purging;
309 self.buffer.push_back(ch);
310 self.last_ccc = Some(ch_class);
313 match unicode::char::compose(k, ch) {
315 self.composee = Some(r);
319 self.buffer.push_back(ch);
320 self.last_ccc = Some(ch_class);
326 self.state = Finished;
327 if self.composee.is_some() {
328 return self.composee.take();
332 match self.buffer.pop_front() {
333 None => self.state = Composing,
338 match self.buffer.pop_front() {
339 None => return self.composee.take(),
348 /// External iterator for a string's UTF16 codeunits.
349 /// Use with the `std::iter` module.
351 pub struct Utf16Units<'a> {
352 encoder: Utf16Encoder<Chars<'a>>
355 impl<'a> Iterator for Utf16Units<'a> {
359 fn next(&mut self) -> Option<u16> { self.encoder.next() }
362 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
369 // Return the initial codepoint accumulator for the first byte.
370 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
371 // for width 3, and 3 bits for width 4
372 macro_rules! utf8_first_byte {
373 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
376 // return the value of $ch updated with continuation byte $byte
377 macro_rules! utf8_acc_cont_byte {
378 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
381 #[unstable = "trait is unstable"]
382 impl BorrowFrom<String> for str {
383 fn borrow_from(owned: &String) -> &str { owned[] }
386 #[unstable = "trait is unstable"]
387 impl ToOwned<String> for str {
388 fn to_owned(&self) -> String {
390 String::from_utf8_unchecked(self.as_bytes().to_owned())
400 Section: Trait implementations
403 /// Any string that can be represented as a slice.
404 pub trait StrExt for Sized?: ops::Slice<uint, str> {
405 /// Escapes each char in `s` with `char::escape_default`.
406 #[unstable = "return type may change to be an iterator"]
407 fn escape_default(&self) -> String {
408 self.chars().flat_map(|c| c.escape_default()).collect()
411 /// Escapes each char in `s` with `char::escape_unicode`.
412 #[unstable = "return type may change to be an iterator"]
413 fn escape_unicode(&self) -> String {
414 self.chars().flat_map(|c| c.escape_unicode()).collect()
417 /// Replaces all occurrences of one string with another.
421 /// * `from` - The string to replace
422 /// * `to` - The replacement string
426 /// The original string with all occurrences of `from` replaced with `to`.
431 /// let s = "Do you know the muffin man,
432 /// The muffin man, the muffin man, ...".to_string();
434 /// assert_eq!(s.replace("muffin man", "little lamb"),
435 /// "Do you know the little lamb,
436 /// The little lamb, the little lamb, ...".to_string());
438 /// // not found, so no change.
439 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
442 fn replace(&self, from: &str, to: &str) -> String {
443 let mut result = String::new();
444 let mut last_end = 0;
445 for (start, end) in self.match_indices(from) {
446 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
450 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
454 /// Returns an iterator over the string in Unicode Normalization Form D
455 /// (canonical decomposition).
457 #[unstable = "this functionality may be moved to libunicode"]
458 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
460 iter: self[].chars(),
467 /// Returns an iterator over the string in Unicode Normalization Form KD
468 /// (compatibility decomposition).
470 #[unstable = "this functionality may be moved to libunicode"]
471 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
473 iter: self[].chars(),
480 /// An Iterator over the string in Unicode Normalization Form C
481 /// (canonical decomposition followed by canonical composition).
483 #[unstable = "this functionality may be moved to libunicode"]
484 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
486 iter: self.nfd_chars(),
488 buffer: RingBuf::new(),
494 /// An Iterator over the string in Unicode Normalization Form KC
495 /// (compatibility decomposition followed by canonical composition).
497 #[unstable = "this functionality may be moved to libunicode"]
498 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
500 iter: self.nfkd_chars(),
502 buffer: RingBuf::new(),
508 /// Returns true if a string contains a string pattern.
512 /// - pat - The string pattern to look for
517 /// assert!("bananas".contains("nana"));
520 fn contains(&self, pat: &str) -> bool {
521 core_str::StrExt::contains(self[], pat)
524 /// Returns true if a string contains a char pattern.
528 /// - pat - The char pattern to look for
533 /// assert!("hello".contains_char('e'));
535 #[unstable = "might get removed in favour of a more generic contains()"]
536 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
537 core_str::StrExt::contains_char(self[], pat)
540 /// An iterator over the characters of `self`. Note, this iterates
541 /// over Unicode code-points, not Unicode graphemes.
546 /// let v: Vec<char> = "abc åäö".chars().collect();
547 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
550 fn chars(&self) -> Chars {
551 core_str::StrExt::chars(self[])
554 /// An iterator over the bytes of `self`
559 /// let v: Vec<u8> = "bors".bytes().collect();
560 /// assert_eq!(v, b"bors".to_vec());
563 fn bytes(&self) -> Bytes {
564 core_str::StrExt::bytes(self[])
567 /// An iterator over the characters of `self` and their byte offsets.
569 fn char_indices(&self) -> CharIndices {
570 core_str::StrExt::char_indices(self[])
573 /// An iterator over substrings of `self`, separated by characters
574 /// matched by the pattern `pat`.
579 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
580 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
582 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
583 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
585 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
586 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
588 /// let v: Vec<&str> = "".split('X').collect();
589 /// assert_eq!(v, vec![""]);
592 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
593 core_str::StrExt::split(self[], pat)
596 /// An iterator over substrings of `self`, separated by characters
597 /// matched by the pattern `pat`, restricted to splitting at most `count`
603 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
604 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
606 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
607 /// assert_eq!(v, vec!["abc", "def2ghi"]);
609 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
610 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
612 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
613 /// assert_eq!(v, vec!["abcXdef"]);
615 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
616 /// assert_eq!(v, vec![""]);
619 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
620 core_str::StrExt::splitn(self[], count, pat)
623 /// An iterator over substrings of `self`, separated by characters
624 /// matched by the pattern `pat`.
626 /// Equivalent to `split`, except that the trailing substring
627 /// is skipped if empty (terminator semantics).
632 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
633 /// assert_eq!(v, vec!["A", "B"]);
635 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
636 /// assert_eq!(v, vec!["A", "", "B", ""]);
638 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
639 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
641 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
642 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
644 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
645 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
647 #[unstable = "might get removed"]
648 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
649 core_str::StrExt::split_terminator(self[], pat)
652 /// An iterator over substrings of `self`, separated by characters
653 /// matched by the pattern `pat`, starting from the end of the string.
654 /// Restricted to splitting at most `count` times.
659 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
660 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
662 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
663 /// assert_eq!(v, vec!["ghi", "abc1def"]);
665 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
666 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
669 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
670 core_str::StrExt::rsplitn(self[], count, pat)
673 /// An iterator over the start and end indices of the disjoint
674 /// matches of the pattern `pat` within `self`.
676 /// That is, each returned value `(start, end)` satisfies
677 /// `self.slice(start, end) == sep`. For matches of `sep` within
678 /// `self` that overlap, only the indices corresponding to the
679 /// first match are returned.
684 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
685 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
687 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
688 /// assert_eq!(v, vec![(1,4), (4,7)]);
690 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
691 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
693 #[unstable = "might have its iterator type changed"]
694 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
695 core_str::StrExt::match_indices(self[], pat)
698 /// An iterator over the substrings of `self` separated by the pattern `sep`.
703 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
704 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
706 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
707 /// assert_eq!(v, vec!["1", "", "2"]);
709 #[unstable = "might get removed in the future in favor of a more generic split()"]
710 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
711 core_str::StrExt::split_str(self[], pat)
714 /// An iterator over the lines of a string (subsequences separated
715 /// by `\n`). This does not include the empty string after a
721 /// let four_lines = "foo\nbar\n\nbaz\n";
722 /// let v: Vec<&str> = four_lines.lines().collect();
723 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
726 fn lines(&self) -> Lines {
727 core_str::StrExt::lines(self[])
730 /// An iterator over the lines of a string, separated by either
731 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
732 /// empty trailing line.
737 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
738 /// let v: Vec<&str> = four_lines.lines_any().collect();
739 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
742 fn lines_any(&self) -> LinesAny {
743 core_str::StrExt::lines_any(self[])
746 /// Returns a slice of the given string from the byte range
747 /// [`begin`..`end`).
749 /// This operation is `O(1)`.
751 /// Panics when `begin` and `end` do not point to valid characters
752 /// or point beyond the last character of the string.
754 /// See also `slice_to` and `slice_from` for slicing prefixes and
755 /// suffixes of strings, and `slice_chars` for slicing based on
756 /// code point counts.
761 /// let s = "Löwe 老虎 Léopard";
762 /// assert_eq!(s.slice(0, 1), "L");
764 /// assert_eq!(s.slice(1, 9), "öwe 老");
766 /// // these will panic:
767 /// // byte 2 lies within `ö`:
768 /// // s.slice(2, 3);
770 /// // byte 8 lies within `老`
771 /// // s.slice(1, 8);
773 /// // byte 100 is outside the string
774 /// // s.slice(3, 100);
776 #[unstable = "use slice notation [a..b] instead"]
777 fn slice(&self, begin: uint, end: uint) -> &str {
778 core_str::StrExt::slice(self[], begin, end)
781 /// Returns a slice of the string from `begin` to its end.
783 /// Equivalent to `self.slice(begin, self.len())`.
785 /// Panics when `begin` does not point to a valid character, or is
788 /// See also `slice`, `slice_to` and `slice_chars`.
789 #[unstable = "use slice notation [a..] instead"]
790 fn slice_from(&self, begin: uint) -> &str {
791 core_str::StrExt::slice_from(self[], begin)
794 /// Returns a slice of the string from the beginning to byte
797 /// Equivalent to `self.slice(0, end)`.
799 /// Panics when `end` does not point to a valid character, or is
802 /// See also `slice`, `slice_from` and `slice_chars`.
803 #[unstable = "use slice notation [0..a] instead"]
804 fn slice_to(&self, end: uint) -> &str {
805 core_str::StrExt::slice_to(self[], end)
808 /// Returns a slice of the string from the character range
809 /// [`begin`..`end`).
811 /// That is, start at the `begin`-th code point of the string and
812 /// continue to the `end`-th code point. This does not detect or
813 /// handle edge cases such as leaving a combining character as the
814 /// first code point of the string.
816 /// Due to the design of UTF-8, this operation is `O(end)`.
817 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
818 /// variants that use byte indices rather than code point
821 /// Panics if `begin` > `end` or the either `begin` or `end` are
822 /// beyond the last character of the string.
827 /// let s = "Löwe 老虎 Léopard";
828 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
829 /// assert_eq!(s.slice_chars(5, 7), "老虎");
831 #[unstable = "may have yet to prove its worth"]
832 fn slice_chars(&self, begin: uint, end: uint) -> &str {
833 core_str::StrExt::slice_chars(self[], begin, end)
836 /// Takes a bytewise (not UTF-8) slice from a string.
838 /// Returns the substring from [`begin`..`end`).
840 /// Caller must check both UTF-8 character boundaries and the boundaries of
841 /// the entire slice as well.
843 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
844 core_str::StrExt::slice_unchecked(self[], begin, end)
847 /// Returns true if the pattern `pat` is a prefix of the string.
852 /// assert!("banana".starts_with("ba"));
855 fn starts_with(&self, pat: &str) -> bool {
856 core_str::StrExt::starts_with(self[], pat)
859 /// Returns true if the pattern `pat` is a suffix of the string.
864 /// assert!("banana".ends_with("nana"));
867 fn ends_with(&self, pat: &str) -> bool {
868 core_str::StrExt::ends_with(self[], pat)
871 /// Returns a string with all pre- and suffixes that match
872 /// the pattern `pat` repeatedly removed.
876 /// * pat - a string pattern
881 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
882 /// let x: &[_] = &['1', '2'];
883 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
884 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
887 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
888 core_str::StrExt::trim_matches(self[], pat)
891 /// Returns a string with all prefixes that match
892 /// the pattern `pat` repeatedly removed.
896 /// * pat - a string pattern
901 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
902 /// let x: &[_] = &['1', '2'];
903 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
904 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
907 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
908 core_str::StrExt::trim_left_matches(self[], pat)
911 /// Returns a string with all suffixes that match
912 /// the pattern `pat` repeatedly removed.
916 /// * pat - a string pattern
921 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
922 /// let x: &[_] = &['1', '2'];
923 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
924 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
927 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
928 core_str::StrExt::trim_right_matches(self[], pat)
931 /// Check that `index`-th byte lies at the start and/or end of a
932 /// UTF-8 code point sequence.
934 /// The start and end of the string (when `index == self.len()`)
935 /// are considered to be boundaries.
937 /// Panics if `index` is greater than `self.len()`.
942 /// let s = "Löwe 老虎 Léopard";
943 /// assert!(s.is_char_boundary(0));
945 /// assert!(s.is_char_boundary(6));
946 /// assert!(s.is_char_boundary(s.len()));
948 /// // second byte of `ö`
949 /// assert!(!s.is_char_boundary(2));
951 /// // third byte of `老`
952 /// assert!(!s.is_char_boundary(8));
954 #[unstable = "naming is uncertain with container conventions"]
955 fn is_char_boundary(&self, index: uint) -> bool {
956 core_str::StrExt::is_char_boundary(self[], index)
959 /// Pluck a character out of a string and return the index of the next
962 /// This function can be used to iterate over the Unicode characters of a
967 /// This example manually iterates through the characters of a
968 /// string; this should normally be done by `.chars()` or
972 /// use std::str::CharRange;
974 /// let s = "中华Việt Nam";
976 /// while i < s.len() {
977 /// let CharRange {ch, next} = s.char_range_at(i);
978 /// println!("{}: {}", i, ch);
1000 /// * s - The string
1001 /// * i - The byte offset of the char to extract
1005 /// A record {ch: char, next: uint} containing the char value and the byte
1006 /// index of the next Unicode character.
1010 /// If `i` is greater than or equal to the length of the string.
1011 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1012 #[unstable = "naming is uncertain with container conventions"]
1013 fn char_range_at(&self, start: uint) -> CharRange {
1014 core_str::StrExt::char_range_at(self[], start)
1017 /// Given a byte position and a str, return the previous char and its position.
1019 /// This function can be used to iterate over a Unicode string in reverse.
1021 /// Returns 0 for next index if called on start index 0.
1025 /// If `i` is greater than the length of the string.
1026 /// If `i` is not an index following a valid UTF-8 character.
1027 #[unstable = "naming is uncertain with container conventions"]
1028 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1029 core_str::StrExt::char_range_at_reverse(self[], start)
1032 /// Plucks the character starting at the `i`th byte of a string.
1038 /// assert_eq!(s.char_at(1), 'b');
1039 /// assert_eq!(s.char_at(2), 'π');
1040 /// assert_eq!(s.char_at(4), 'c');
1045 /// If `i` is greater than or equal to the length of the string.
1046 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1047 #[unstable = "naming is uncertain with container conventions"]
1048 fn char_at(&self, i: uint) -> char {
1049 core_str::StrExt::char_at(self[], i)
1052 /// Plucks the character ending at the `i`th byte of a string.
1056 /// If `i` is greater than the length of the string.
1057 /// If `i` is not an index following a valid UTF-8 character.
1058 #[unstable = "naming is uncertain with container conventions"]
1059 fn char_at_reverse(&self, i: uint) -> char {
1060 core_str::StrExt::char_at_reverse(self[], i)
1063 /// Work with the byte buffer of a string as a byte slice.
1068 /// assert_eq!("bors".as_bytes(), b"bors");
1071 fn as_bytes(&self) -> &[u8] {
1072 core_str::StrExt::as_bytes(self[])
1075 /// Returns the byte index of the first character of `self` that
1076 /// matches the pattern `pat`.
1080 /// `Some` containing the byte index of the last matching character
1081 /// or `None` if there is no match
1086 /// let s = "Löwe 老虎 Léopard";
1088 /// assert_eq!(s.find('L'), Some(0));
1089 /// assert_eq!(s.find('é'), Some(14));
1091 /// // the first space
1092 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1094 /// // neither are found
1095 /// let x: &[_] = &['1', '2'];
1096 /// assert_eq!(s.find(x), None);
1099 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1100 core_str::StrExt::find(self[], pat)
1103 /// Returns the byte index of the last character of `self` that
1104 /// matches the pattern `pat`.
1108 /// `Some` containing the byte index of the last matching character
1109 /// or `None` if there is no match.
1114 /// let s = "Löwe 老虎 Léopard";
1116 /// assert_eq!(s.rfind('L'), Some(13));
1117 /// assert_eq!(s.rfind('é'), Some(14));
1119 /// // the second space
1120 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1122 /// // searches for an occurrence of either `1` or `2`, but neither are found
1123 /// let x: &[_] = &['1', '2'];
1124 /// assert_eq!(s.rfind(x), None);
1127 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1128 core_str::StrExt::rfind(self[], pat)
1131 /// Returns the byte index of the first matching substring
1135 /// * `needle` - The string to search for
1139 /// `Some` containing the byte index of the first matching substring
1140 /// or `None` if there is no match.
1145 /// let s = "Löwe 老虎 Léopard";
1147 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1148 /// assert_eq!(s.find_str("muffin man"), None);
1150 #[unstable = "might get removed in favor of a more generic find in the future"]
1151 fn find_str(&self, needle: &str) -> Option<uint> {
1152 core_str::StrExt::find_str(self[], needle)
1155 /// Retrieves the first character from a string slice and returns
1156 /// it. This does not allocate a new string; instead, it returns a
1157 /// slice that point one character beyond the character that was
1158 /// shifted. If the string does not contain any characters,
1159 /// None is returned instead.
1164 /// let s = "Löwe 老虎 Léopard";
1165 /// let (c, s1) = s.slice_shift_char().unwrap();
1166 /// assert_eq!(c, 'L');
1167 /// assert_eq!(s1, "öwe 老虎 Léopard");
1169 /// let (c, s2) = s1.slice_shift_char().unwrap();
1170 /// assert_eq!(c, 'ö');
1171 /// assert_eq!(s2, "we 老虎 Léopard");
1173 #[unstable = "awaiting conventions about shifting and slices"]
1174 fn slice_shift_char(&self) -> Option<(char, &str)> {
1175 core_str::StrExt::slice_shift_char(self[])
1178 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1180 /// Panics if `inner` is not a direct slice contained within self.
1185 /// let string = "a\nb\nc";
1186 /// let lines: Vec<&str> = string.lines().collect();
1188 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1189 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1190 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1192 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1193 fn subslice_offset(&self, inner: &str) -> uint {
1194 core_str::StrExt::subslice_offset(self[], inner)
1197 /// Return an unsafe pointer to the strings buffer.
1199 /// The caller must ensure that the string outlives this pointer,
1200 /// and that it is not reallocated (e.g. by pushing to the
1204 fn as_ptr(&self) -> *const u8 {
1205 core_str::StrExt::as_ptr(self[])
1208 /// Return an iterator of `u16` over the string encoded as UTF-16.
1209 #[unstable = "this functionality may only be provided by libunicode"]
1210 fn utf16_units(&self) -> Utf16Units {
1211 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1214 /// Return the number of bytes in this string
1219 /// assert_eq!("foo".len(), 3);
1220 /// assert_eq!("ƒoo".len(), 4);
1224 fn len(&self) -> uint {
1225 core_str::StrExt::len(self[])
1228 /// Returns true if this slice contains no bytes
1233 /// assert!("".is_empty());
1237 fn is_empty(&self) -> bool {
1238 core_str::StrExt::is_empty(self[])
1241 /// Parse this string into the specified type.
1246 /// assert_eq!("4".parse::<u32>(), Some(4));
1247 /// assert_eq!("j".parse::<u32>(), None);
1250 #[unstable = "this method was just created"]
1251 fn parse<F: FromStr>(&self) -> Option<F> {
1252 core_str::StrExt::parse(self[])
1255 /// Returns an iterator over the
1256 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1259 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1260 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1261 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1262 /// recommends extended grapheme cluster boundaries for general processing.
1267 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1268 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1269 /// assert_eq!(gr1.as_slice(), b);
1270 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1271 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1272 /// assert_eq!(gr2.as_slice(), b);
1274 #[unstable = "this functionality may only be provided by libunicode"]
1275 fn graphemes(&self, is_extended: bool) -> Graphemes {
1276 UnicodeStr::graphemes(self[], is_extended)
1279 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1280 /// See `graphemes()` method for more information.
1285 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1286 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1287 /// assert_eq!(gr_inds.as_slice(), b);
1289 #[unstable = "this functionality may only be provided by libunicode"]
1290 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1291 UnicodeStr::grapheme_indices(self[], is_extended)
1294 /// An iterator over the words of a string (subsequences separated
1295 /// by any sequence of whitespace). Sequences of whitespace are
1296 /// collapsed, so empty "words" are not included.
1301 /// let some_words = " Mary had\ta little \n\t lamb";
1302 /// let v: Vec<&str> = some_words.words().collect();
1303 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1306 fn words(&self) -> Words {
1307 UnicodeStr::words(self[])
1310 /// Returns a string's displayed width in columns, treating control
1311 /// characters as zero-width.
1313 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1314 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1315 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1316 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1317 /// recommends that these characters be treated as 1 column (i.e.,
1318 /// `is_cjk` = `false`) if the locale is unknown.
1319 #[unstable = "this functionality may only be provided by libunicode"]
1320 fn width(&self, is_cjk: bool) -> uint {
1321 UnicodeStr::width(self[], is_cjk)
1324 /// Returns a string with leading and trailing whitespace removed.
1326 fn trim(&self) -> &str {
1327 UnicodeStr::trim(self[])
1330 /// Returns a string with leading whitespace removed.
1332 fn trim_left(&self) -> &str {
1333 UnicodeStr::trim_left(self[])
1336 /// Returns a string with trailing whitespace removed.
1338 fn trim_right(&self) -> &str {
1339 UnicodeStr::trim_right(self[])
1343 impl StrExt for str {}
1349 use core::iter::AdditiveIterator;
1350 use super::from_utf8;
1351 use super::Utf8Error;
1356 assert!("" <= "foo");
1357 assert!("foo" <= "foo");
1358 assert!("foo" != "bar");
1363 assert_eq!("".len(), 0u);
1364 assert_eq!("hello world".len(), 11u);
1365 assert_eq!("\x63".len(), 1u);
1366 assert_eq!("\u{a2}".len(), 2u);
1367 assert_eq!("\u{3c0}".len(), 2u);
1368 assert_eq!("\u{2620}".len(), 3u);
1369 assert_eq!("\u{1d11e}".len(), 4u);
1371 assert_eq!("".chars().count(), 0u);
1372 assert_eq!("hello world".chars().count(), 11u);
1373 assert_eq!("\x63".chars().count(), 1u);
1374 assert_eq!("\u{a2}".chars().count(), 1u);
1375 assert_eq!("\u{3c0}".chars().count(), 1u);
1376 assert_eq!("\u{2620}".chars().count(), 1u);
1377 assert_eq!("\u{1d11e}".chars().count(), 1u);
1378 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1380 assert_eq!("hello".width(false), 10u);
1381 assert_eq!("hello".width(true), 10u);
1382 assert_eq!("\0\0\0\0\0".width(false), 0u);
1383 assert_eq!("\0\0\0\0\0".width(true), 0u);
1384 assert_eq!("".width(false), 0u);
1385 assert_eq!("".width(true), 0u);
1386 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1387 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1392 assert_eq!("hello".find('l'), Some(2u));
1393 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1394 assert!("hello".find('x').is_none());
1395 assert!("hello".find(|&: c:char| c == 'x').is_none());
1396 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1397 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1402 assert_eq!("hello".rfind('l'), Some(3u));
1403 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1404 assert!("hello".rfind('x').is_none());
1405 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1406 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1407 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1412 let empty = String::from_str("");
1413 let s: String = empty.chars().collect();
1414 assert_eq!(empty, s);
1415 let data = String::from_str("ประเทศไทย中");
1416 let s: String = data.chars().collect();
1417 assert_eq!(data, s);
1421 fn test_into_bytes() {
1422 let data = String::from_str("asdf");
1423 let buf = data.into_bytes();
1424 assert_eq!(b"asdf", buf);
1428 fn test_find_str() {
1430 assert_eq!("".find_str(""), Some(0u));
1431 assert!("banana".find_str("apple pie").is_none());
1433 let data = "abcabc";
1434 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1435 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1436 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1438 let string = "ประเทศไทย中华Việt Nam";
1439 let mut data = String::from_str(string);
1440 data.push_str(string);
1441 assert!(data.find_str("ไท华").is_none());
1442 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1443 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1445 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1446 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1447 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1448 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1449 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1451 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1452 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1453 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1454 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1455 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1459 fn test_slice_chars() {
1460 fn t(a: &str, b: &str, start: uint) {
1461 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1464 t("hello", "llo", 2);
1465 t("hello", "el", 1);
1468 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1471 fn s(x: &str) -> String { x.to_string() }
1473 macro_rules! test_concat {
1474 ($expected: expr, $string: expr) => {
1476 let s: String = $string.concat();
1477 assert_eq!($expected, s);
1483 fn test_concat_for_different_types() {
1484 test_concat!("ab", vec![s("a"), s("b")]);
1485 test_concat!("ab", vec!["a", "b"]);
1486 test_concat!("ab", vec!["a", "b"].as_slice());
1487 test_concat!("ab", vec![s("a"), s("b")]);
1491 fn test_concat_for_different_lengths() {
1492 let empty: &[&str] = &[];
1493 test_concat!("", empty);
1494 test_concat!("a", ["a"]);
1495 test_concat!("ab", ["a", "b"]);
1496 test_concat!("abc", ["", "a", "bc"]);
1499 macro_rules! test_connect {
1500 ($expected: expr, $string: expr, $delim: expr) => {
1502 let s = $string.connect($delim);
1503 assert_eq!($expected, s);
1509 fn test_connect_for_different_types() {
1510 test_connect!("a-b", ["a", "b"], "-");
1511 let hyphen = "-".to_string();
1512 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1513 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1514 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1515 test_connect!("a-b", vec![s("a"), s("b")], "-");
1519 fn test_connect_for_different_lengths() {
1520 let empty: &[&str] = &[];
1521 test_connect!("", empty, "-");
1522 test_connect!("a", ["a"], "-");
1523 test_connect!("a-b", ["a", "b"], "-");
1524 test_connect!("-a-bc", ["", "a", "bc"], "-");
1528 fn test_unsafe_slice() {
1529 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1530 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1531 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1532 fn a_million_letter_a() -> String {
1534 let mut rs = String::new();
1536 rs.push_str("aaaaaaaaaa");
1541 fn half_a_million_letter_a() -> String {
1543 let mut rs = String::new();
1545 rs.push_str("aaaaa");
1550 let letters = a_million_letter_a();
1551 assert!(half_a_million_letter_a() ==
1552 unsafe {String::from_str(letters.slice_unchecked(
1558 fn test_starts_with() {
1559 assert!(("".starts_with("")));
1560 assert!(("abc".starts_with("")));
1561 assert!(("abc".starts_with("a")));
1562 assert!((!"a".starts_with("abc")));
1563 assert!((!"".starts_with("abc")));
1564 assert!((!"ödd".starts_with("-")));
1565 assert!(("ödd".starts_with("öd")));
1569 fn test_ends_with() {
1570 assert!(("".ends_with("")));
1571 assert!(("abc".ends_with("")));
1572 assert!(("abc".ends_with("c")));
1573 assert!((!"a".ends_with("abc")));
1574 assert!((!"".ends_with("abc")));
1575 assert!((!"ddö".ends_with("-")));
1576 assert!(("ddö".ends_with("dö")));
1580 fn test_is_empty() {
1581 assert!("".is_empty());
1582 assert!(!"a".is_empty());
1588 assert_eq!("".replace(a, "b"), String::from_str(""));
1589 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1590 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1592 assert!(" test test ".replace(test, "toast") ==
1593 String::from_str(" toast toast "));
1594 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1598 fn test_replace_2a() {
1599 let data = "ประเทศไทย中华";
1600 let repl = "دولة الكويت";
1603 let a2 = "دولة الكويتทศไทย中华";
1604 assert_eq!(data.replace(a, repl), a2);
1608 fn test_replace_2b() {
1609 let data = "ประเทศไทย中华";
1610 let repl = "دولة الكويت";
1613 let b2 = "ปรدولة الكويتทศไทย中华";
1614 assert_eq!(data.replace(b, repl), b2);
1618 fn test_replace_2c() {
1619 let data = "ประเทศไทย中华";
1620 let repl = "دولة الكويت";
1623 let c2 = "ประเทศไทยدولة الكويت";
1624 assert_eq!(data.replace(c, repl), c2);
1628 fn test_replace_2d() {
1629 let data = "ประเทศไทย中华";
1630 let repl = "دولة الكويت";
1633 assert_eq!(data.replace(d, repl), data);
1638 assert_eq!("ab", "abc".slice(0, 2));
1639 assert_eq!("bc", "abc".slice(1, 3));
1640 assert_eq!("", "abc".slice(1, 1));
1641 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1643 let data = "ประเทศไทย中华";
1644 assert_eq!("ป", data.slice(0, 3));
1645 assert_eq!("ร", data.slice(3, 6));
1646 assert_eq!("", data.slice(3, 3));
1647 assert_eq!("华", data.slice(30, 33));
1649 fn a_million_letter_x() -> String {
1651 let mut rs = String::new();
1653 rs.push_str("华华华华华华华华华华");
1658 fn half_a_million_letter_x() -> String {
1660 let mut rs = String::new();
1662 rs.push_str("华华华华华");
1667 let letters = a_million_letter_x();
1668 assert!(half_a_million_letter_x() ==
1669 String::from_str(letters.slice(0u, 3u * 500000u)));
1674 let ss = "中华Việt Nam";
1676 assert_eq!("华", ss.slice(3u, 6u));
1677 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1679 assert_eq!("ab", "abc".slice(0u, 2u));
1680 assert_eq!("bc", "abc".slice(1u, 3u));
1681 assert_eq!("", "abc".slice(1u, 1u));
1683 assert_eq!("中", ss.slice(0u, 3u));
1684 assert_eq!("华V", ss.slice(3u, 7u));
1685 assert_eq!("", ss.slice(3u, 3u));
1700 fn test_slice_fail() {
1701 "中华Việt Nam".slice(0u, 2u);
1705 fn test_slice_from() {
1706 assert_eq!("abcd".slice_from(0), "abcd");
1707 assert_eq!("abcd".slice_from(2), "cd");
1708 assert_eq!("abcd".slice_from(4), "");
1711 fn test_slice_to() {
1712 assert_eq!("abcd".slice_to(0), "");
1713 assert_eq!("abcd".slice_to(2), "ab");
1714 assert_eq!("abcd".slice_to(4), "abcd");
1718 fn test_trim_left_matches() {
1719 let v: &[char] = &[];
1720 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1721 let chars: &[char] = &['*', ' '];
1722 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1723 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1724 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1726 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1727 let chars: &[char] = &['1', '2'];
1728 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1729 assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1733 fn test_trim_right_matches() {
1734 let v: &[char] = &[];
1735 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1736 let chars: &[char] = &['*', ' '];
1737 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1738 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1739 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1741 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1742 let chars: &[char] = &['1', '2'];
1743 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1744 assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1748 fn test_trim_matches() {
1749 let v: &[char] = &[];
1750 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1751 let chars: &[char] = &['*', ' '];
1752 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1753 assert_eq!(" *** *** ".trim_matches(chars), "");
1754 assert_eq!("foo".trim_matches(chars), "foo");
1756 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1757 let chars: &[char] = &['1', '2'];
1758 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1759 assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1763 fn test_trim_left() {
1764 assert_eq!("".trim_left(), "");
1765 assert_eq!("a".trim_left(), "a");
1766 assert_eq!(" ".trim_left(), "");
1767 assert_eq!(" blah".trim_left(), "blah");
1768 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1769 assert_eq!("hey ".trim_left(), "hey ");
1773 fn test_trim_right() {
1774 assert_eq!("".trim_right(), "");
1775 assert_eq!("a".trim_right(), "a");
1776 assert_eq!(" ".trim_right(), "");
1777 assert_eq!("blah ".trim_right(), "blah");
1778 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1779 assert_eq!(" hey".trim_right(), " hey");
1784 assert_eq!("".trim(), "");
1785 assert_eq!("a".trim(), "a");
1786 assert_eq!(" ".trim(), "");
1787 assert_eq!(" blah ".trim(), "blah");
1788 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1789 assert_eq!(" hey dude ".trim(), "hey dude");
1793 fn test_is_whitespace() {
1794 assert!("".chars().all(|c| c.is_whitespace()));
1795 assert!(" ".chars().all(|c| c.is_whitespace()));
1796 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1797 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1798 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1802 fn test_slice_shift_char() {
1803 let data = "ประเทศไทย中";
1804 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1808 fn test_slice_shift_char_2() {
1810 assert_eq!(empty.slice_shift_char(), None);
1815 // deny overlong encodings
1816 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1817 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1818 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1819 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1820 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1821 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1822 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1825 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1826 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1828 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1829 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1830 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1831 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1832 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1833 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1834 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1835 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1839 fn test_is_utf16() {
1840 use unicode::str::is_utf16;
1841 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1849 // surrogate pairs (randomly generated with Python 3's
1850 // .encode('utf-16be'))
1851 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1852 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1853 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1855 // mixtures (also random)
1856 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1857 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1858 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1861 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1864 // surrogate + regular unit
1866 // surrogate + lead surrogate
1868 // unterminated surrogate
1870 // trail surrogate without a lead
1873 // random byte sequences that Python 3's .decode('utf-16be')
1875 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1876 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1877 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1878 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1879 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1880 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1881 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1882 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1883 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1884 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1885 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1886 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1887 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1888 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1889 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1890 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1891 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1892 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1893 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1894 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1895 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1899 fn test_as_bytes() {
1902 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1903 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1907 assert_eq!("".as_bytes(), b);
1908 assert_eq!("abc".as_bytes(), b"abc");
1909 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1914 fn test_as_bytes_fail() {
1915 // Don't double free. (I'm not sure if this exercises the
1916 // original problem code path anymore.)
1917 let s = String::from_str("");
1918 let _bytes = s.as_bytes();
1924 let buf = "hello".as_ptr();
1926 assert_eq!(*buf.offset(0), b'h');
1927 assert_eq!(*buf.offset(1), b'e');
1928 assert_eq!(*buf.offset(2), b'l');
1929 assert_eq!(*buf.offset(3), b'l');
1930 assert_eq!(*buf.offset(4), b'o');
1935 fn test_subslice_offset() {
1936 let a = "kernelsprite";
1937 let b = a.slice(7, a.len());
1938 let c = a.slice(0, a.len() - 6);
1939 assert_eq!(a.subslice_offset(b), 7);
1940 assert_eq!(a.subslice_offset(c), 0);
1942 let string = "a\nb\nc";
1943 let lines: Vec<&str> = string.lines().collect();
1944 assert_eq!(string.subslice_offset(lines[0]), 0);
1945 assert_eq!(string.subslice_offset(lines[1]), 2);
1946 assert_eq!(string.subslice_offset(lines[2]), 4);
1951 fn test_subslice_offset_2() {
1952 let a = "alchemiter";
1953 let b = "cruxtruder";
1954 a.subslice_offset(b);
1958 fn vec_str_conversions() {
1959 let s1: String = String::from_str("All mimsy were the borogoves");
1961 let v: Vec<u8> = s1.as_bytes().to_vec();
1962 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1963 let mut i: uint = 0u;
1964 let n1: uint = s1.len();
1965 let n2: uint = v.len();
1968 let a: u8 = s1.as_bytes()[i];
1969 let b: u8 = s2.as_bytes()[i];
1978 fn test_contains() {
1979 assert!("abcde".contains("bcd"));
1980 assert!("abcde".contains("abcd"));
1981 assert!("abcde".contains("bcde"));
1982 assert!("abcde".contains(""));
1983 assert!("".contains(""));
1984 assert!(!"abcde".contains("def"));
1985 assert!(!"".contains("a"));
1987 let data = "ประเทศไทย中华Việt Nam";
1988 assert!(data.contains("ประเ"));
1989 assert!(data.contains("ะเ"));
1990 assert!(data.contains("中华"));
1991 assert!(!data.contains("ไท华"));
1995 fn test_contains_char() {
1996 assert!("abc".contains_char('b'));
1997 assert!("a".contains_char('a'));
1998 assert!(!"abc".contains_char('d'));
1999 assert!(!"".contains_char('a'));
2004 let s = "ศไทย中华Việt Nam";
2005 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2007 for ch in v.iter() {
2008 assert!(s.char_at(pos) == *ch);
2009 pos += ch.to_string().len();
2014 fn test_char_at_reverse() {
2015 let s = "ศไทย中华Việt Nam";
2016 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2017 let mut pos = s.len();
2018 for ch in v.iter().rev() {
2019 assert!(s.char_at_reverse(pos) == *ch);
2020 pos -= ch.to_string().len();
2025 fn test_escape_unicode() {
2026 assert_eq!("abc".escape_unicode(),
2027 String::from_str("\\u{61}\\u{62}\\u{63}"));
2028 assert_eq!("a c".escape_unicode(),
2029 String::from_str("\\u{61}\\u{20}\\u{63}"));
2030 assert_eq!("\r\n\t".escape_unicode(),
2031 String::from_str("\\u{d}\\u{a}\\u{9}"));
2032 assert_eq!("'\"\\".escape_unicode(),
2033 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2034 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2035 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2036 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2037 String::from_str("\\u{100}\\u{ffff}"));
2038 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2039 String::from_str("\\u{10000}\\u{10ffff}"));
2040 assert_eq!("ab\u{fb00}".escape_unicode(),
2041 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2042 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2043 String::from_str("\\u{1d4ea}\\u{d}"));
2047 fn test_escape_default() {
2048 assert_eq!("abc".escape_default(), String::from_str("abc"));
2049 assert_eq!("a c".escape_default(), String::from_str("a c"));
2050 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2051 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2052 assert_eq!("\u{100}\u{ffff}".escape_default(),
2053 String::from_str("\\u{100}\\u{ffff}"));
2054 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2055 String::from_str("\\u{10000}\\u{10ffff}"));
2056 assert_eq!("ab\u{fb00}".escape_default(),
2057 String::from_str("ab\\u{fb00}"));
2058 assert_eq!("\u{1d4ea}\r".escape_default(),
2059 String::from_str("\\u{1d4ea}\\r"));
2063 fn test_total_ord() {
2064 "1234".cmp("123") == Greater;
2065 "123".cmp("1234") == Less;
2066 "1234".cmp("1234") == Equal;
2067 "12345555".cmp("123456") == Less;
2068 "22".cmp("1234") == Greater;
2072 fn test_char_range_at() {
2073 let data = "b¢€𤭢𤭢€¢b";
2074 assert_eq!('b', data.char_range_at(0).ch);
2075 assert_eq!('¢', data.char_range_at(1).ch);
2076 assert_eq!('€', data.char_range_at(3).ch);
2077 assert_eq!('𤭢', data.char_range_at(6).ch);
2078 assert_eq!('𤭢', data.char_range_at(10).ch);
2079 assert_eq!('€', data.char_range_at(14).ch);
2080 assert_eq!('¢', data.char_range_at(17).ch);
2081 assert_eq!('b', data.char_range_at(19).ch);
2085 fn test_char_range_at_reverse_underflow() {
2086 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2090 fn test_iterator() {
2091 let s = "ศไทย中华Việt Nam";
2092 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2095 let mut it = s.chars();
2098 assert_eq!(c, v[pos]);
2101 assert_eq!(pos, v.len());
2105 fn test_rev_iterator() {
2106 let s = "ศไทย中华Việt Nam";
2107 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2110 let mut it = s.chars().rev();
2113 assert_eq!(c, v[pos]);
2116 assert_eq!(pos, v.len());
2120 fn test_chars_decoding() {
2121 let mut bytes = [0u8; 4];
2122 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2123 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2124 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2125 if Some(c) != s.chars().next() {
2126 panic!("character {:x}={} does not decode correctly", c as u32, c);
2132 fn test_chars_rev_decoding() {
2133 let mut bytes = [0u8; 4];
2134 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2135 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2136 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2137 if Some(c) != s.chars().rev().next() {
2138 panic!("character {:x}={} does not decode correctly", c as u32, c);
2144 fn test_iterator_clone() {
2145 let s = "ศไทย中华Việt Nam";
2146 let mut it = s.chars();
2148 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2152 fn test_bytesator() {
2153 let s = "ศไทย中华Việt Nam";
2155 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2156 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2161 for b in s.bytes() {
2162 assert_eq!(b, v[pos]);
2168 fn test_bytes_revator() {
2169 let s = "ศไทย中华Việt Nam";
2171 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2172 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2175 let mut pos = v.len();
2177 for b in s.bytes().rev() {
2179 assert_eq!(b, v[pos]);
2184 fn test_char_indicesator() {
2185 let s = "ศไทย中华Việt Nam";
2186 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2187 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2190 let mut it = s.char_indices();
2193 assert_eq!(c, (p[pos], v[pos]));
2196 assert_eq!(pos, v.len());
2197 assert_eq!(pos, p.len());
2201 fn test_char_indices_revator() {
2202 let s = "ศไทย中华Việt Nam";
2203 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2204 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2207 let mut it = s.char_indices().rev();
2210 assert_eq!(c, (p[pos], v[pos]));
2213 assert_eq!(pos, v.len());
2214 assert_eq!(pos, p.len());
2218 fn test_splitn_char_iterator() {
2219 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2221 let split: Vec<&str> = data.splitn(3, ' ').collect();
2222 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2224 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2225 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2228 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2229 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2231 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2232 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2236 fn test_split_char_iterator_no_trailing() {
2237 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2239 let split: Vec<&str> = data.split('\n').collect();
2240 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2242 let split: Vec<&str> = data.split_terminator('\n').collect();
2243 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2248 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2249 let words: Vec<&str> = data.words().collect();
2250 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2254 fn test_nfd_chars() {
2256 ($input: expr, $expected: expr) => {
2257 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2261 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2262 t!("\u{2026}", "\u{2026}");
2263 t!("\u{2126}", "\u{3a9}");
2264 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2265 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2266 t!("a\u{301}", "a\u{301}");
2267 t!("\u{301}a", "\u{301}a");
2268 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2269 t!("\u{ac1c}", "\u{1100}\u{1162}");
2273 fn test_nfkd_chars() {
2275 ($input: expr, $expected: expr) => {
2276 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2280 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2281 t!("\u{2026}", "...");
2282 t!("\u{2126}", "\u{3a9}");
2283 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2284 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2285 t!("a\u{301}", "a\u{301}");
2286 t!("\u{301}a", "\u{301}a");
2287 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2288 t!("\u{ac1c}", "\u{1100}\u{1162}");
2292 fn test_nfc_chars() {
2294 ($input: expr, $expected: expr) => {
2295 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2299 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2300 t!("\u{2026}", "\u{2026}");
2301 t!("\u{2126}", "\u{3a9}");
2302 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2303 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2304 t!("a\u{301}", "\u{e1}");
2305 t!("\u{301}a", "\u{301}a");
2306 t!("\u{d4db}", "\u{d4db}");
2307 t!("\u{ac1c}", "\u{ac1c}");
2308 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2312 fn test_nfkc_chars() {
2314 ($input: expr, $expected: expr) => {
2315 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2319 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2320 t!("\u{2026}", "...");
2321 t!("\u{2126}", "\u{3a9}");
2322 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2323 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2324 t!("a\u{301}", "\u{e1}");
2325 t!("\u{301}a", "\u{301}a");
2326 t!("\u{d4db}", "\u{d4db}");
2327 t!("\u{ac1c}", "\u{ac1c}");
2328 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2333 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2334 let lines: Vec<&str> = data.lines().collect();
2335 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2337 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2338 let lines: Vec<&str> = data.lines().collect();
2339 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2343 fn test_graphemes() {
2344 use core::iter::order;
2345 // official Unicode test data
2346 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2347 let test_same: [(_, &[_]); 325] = [
2348 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2349 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2350 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2351 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2352 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2353 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2354 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2355 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2356 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2357 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2358 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2359 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2360 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2361 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2362 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2363 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2364 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2365 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2366 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2367 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2368 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2369 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2370 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2371 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2372 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2373 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2374 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2375 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2376 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2377 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2378 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2379 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2380 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2381 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2382 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2383 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2384 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2385 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2386 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2387 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2388 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2389 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2390 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2391 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2392 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2393 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2394 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2395 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2396 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2397 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2398 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2399 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2400 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2401 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2402 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2403 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2404 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2405 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2406 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2407 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2408 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2409 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2410 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2411 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2412 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2413 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2414 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2415 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2416 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2417 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2418 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2419 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2420 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2421 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2422 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2423 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2424 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2425 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2426 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2427 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2428 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2429 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2430 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2431 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2432 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2433 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2434 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2435 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2436 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2437 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2438 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2439 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2440 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2441 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2442 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2443 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2444 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2445 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2446 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2447 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2448 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2449 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2450 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2451 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2452 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2453 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2454 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2455 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2456 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2457 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2458 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2459 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2460 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2461 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2462 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2463 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2464 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2465 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2466 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2467 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2468 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2469 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2470 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2471 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2472 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2473 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2474 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2475 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2476 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2477 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2478 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2479 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2480 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2481 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2482 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2483 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2484 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2485 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2486 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2487 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2488 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2489 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2490 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2491 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2492 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2493 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2494 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2495 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2496 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2497 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2498 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2499 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2500 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2501 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2502 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2503 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2504 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2505 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2506 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2507 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2508 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2509 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2510 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2511 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2512 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2513 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2514 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2515 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2516 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2517 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2518 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2519 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2520 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2521 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2522 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2523 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2524 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2525 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2526 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2527 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2528 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2529 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2530 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2531 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2532 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2533 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2534 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2535 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2536 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2537 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2538 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2539 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2540 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2541 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2542 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2543 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2544 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2545 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2546 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2547 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2548 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2549 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2550 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2551 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2552 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2553 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2554 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2555 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2556 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2557 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2558 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2559 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2560 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2561 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2562 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2563 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2564 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2565 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2566 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2567 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2568 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2569 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2570 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2571 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2572 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2573 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2574 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2575 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2576 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2577 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2578 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2579 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2580 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2581 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2582 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2583 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2584 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2585 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2586 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2587 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2588 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2589 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2590 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2591 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2592 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2593 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2594 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2595 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2596 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2597 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2598 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2599 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2600 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2601 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2602 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2603 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2604 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2605 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2606 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2607 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2608 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2609 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2610 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2611 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2612 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2613 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2614 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2615 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2616 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2617 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2618 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2619 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2620 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2621 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2622 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2623 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2624 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2625 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2626 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2627 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2628 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2629 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2630 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2631 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2632 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2633 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2634 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2635 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2636 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2637 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2638 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2639 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2640 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2641 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2642 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2643 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2644 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2645 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2646 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2647 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2648 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2649 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2650 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2651 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2652 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2653 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2654 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2655 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2656 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2657 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2658 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2659 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2660 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2661 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2662 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2663 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2664 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2665 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2666 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2667 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2668 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2669 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2670 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2671 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2672 "\u{1F1E7}\u{1F1E8}"]),
2673 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2674 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2675 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2676 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2679 let test_diff: [(_, &[_], &[_]); 23] = [
2680 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2681 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2682 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2683 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2684 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2685 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2686 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2687 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2688 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2689 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2690 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2691 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2692 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2693 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2694 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2695 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2696 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2697 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2698 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2699 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2700 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2701 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2702 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2705 for &(s, g) in test_same.iter() {
2706 // test forward iterator
2707 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2708 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2710 // test reverse iterator
2711 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2712 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2715 for &(s, gt, gf) in test_diff.iter() {
2716 // test forward iterator
2717 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2718 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2720 // test reverse iterator
2721 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2722 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2725 // test the indices iterators
2726 let s = "a̐éö̲\r\n";
2727 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2728 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2729 assert_eq!(gr_inds, b);
2730 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2731 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2732 assert_eq!(gr_inds, b);
2733 let mut gr_inds_iter = s.grapheme_indices(true);
2735 let gr_inds = gr_inds_iter.by_ref();
2736 let e1 = gr_inds.size_hint();
2737 assert_eq!(e1, (1, Some(13)));
2738 let c = gr_inds.count();
2741 let e2 = gr_inds_iter.size_hint();
2742 assert_eq!(e2, (0, Some(0)));
2744 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2746 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2747 let b: &[_] = &["\r", "\r\n", "\n"];
2752 fn test_split_strator() {
2753 fn t(s: &str, sep: &str, u: &[&str]) {
2754 let v: Vec<&str> = s.split_str(sep).collect();
2757 t("--1233345--", "12345", &["--1233345--"]);
2758 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2759 t("::hello::there", "::", &["", "hello", "there"]);
2760 t("hello::there::", "::", &["hello", "there", ""]);
2761 t("::hello::there::", "::", &["", "hello", "there", ""]);
2762 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2763 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2764 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2765 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2767 t("zz", "zz", &["",""]);
2768 t("ok", "z", &["ok"]);
2769 t("zzz", "zz", &["","z"]);
2770 t("zzzzz", "zz", &["","","z"]);
2774 fn test_str_default() {
2775 use core::default::Default;
2776 fn t<S: Default + Str>() {
2777 let s: S = Default::default();
2778 assert_eq!(s.as_slice(), "");
2786 fn test_str_container() {
2787 fn sum_len(v: &[&str]) -> uint {
2788 v.iter().map(|x| x.len()).sum()
2791 let s = String::from_str("01234");
2792 assert_eq!(5, sum_len(&["012", "", "34"]));
2793 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2794 String::from_str("2").as_slice(),
2795 String::from_str("34").as_slice(),
2796 String::from_str("").as_slice()]));
2797 assert_eq!(5, sum_len(&[s.as_slice()]));
2801 fn test_str_from_utf8() {
2803 assert_eq!(from_utf8(xs), Ok("hello"));
2805 let xs = "ศไทย中华Việt Nam".as_bytes();
2806 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2808 let xs = b"hello\xFF";
2809 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2816 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2818 use test::black_box;
2821 fn char_iterator(b: &mut Bencher) {
2822 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2824 b.iter(|| s.chars().count());
2828 fn char_iterator_for(b: &mut Bencher) {
2829 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2832 for ch in s.chars() { black_box(ch) }
2837 fn char_iterator_ascii(b: &mut Bencher) {
2838 let s = "Mary had a little lamb, Little lamb
2839 Mary had a little lamb, Little lamb
2840 Mary had a little lamb, Little lamb
2841 Mary had a little lamb, Little lamb
2842 Mary had a little lamb, Little lamb
2843 Mary had a little lamb, Little lamb";
2845 b.iter(|| s.chars().count());
2849 fn char_iterator_rev(b: &mut Bencher) {
2850 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2852 b.iter(|| s.chars().rev().count());
2856 fn char_iterator_rev_for(b: &mut Bencher) {
2857 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2860 for ch in s.chars().rev() { black_box(ch) }
2865 fn char_indicesator(b: &mut Bencher) {
2866 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2867 let len = s.chars().count();
2869 b.iter(|| assert_eq!(s.char_indices().count(), len));
2873 fn char_indicesator_rev(b: &mut Bencher) {
2874 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2875 let len = s.chars().count();
2877 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2881 fn split_unicode_ascii(b: &mut Bencher) {
2882 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2884 b.iter(|| assert_eq!(s.split('V').count(), 3));
2888 fn split_unicode_not_ascii(b: &mut Bencher) {
2889 struct NotAscii(char);
2890 impl CharEq for NotAscii {
2891 fn matches(&mut self, c: char) -> bool {
2892 let NotAscii(cc) = *self;
2895 fn only_ascii(&self) -> bool { false }
2897 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2899 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2904 fn split_ascii(b: &mut Bencher) {
2905 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2906 let len = s.split(' ').count();
2908 b.iter(|| assert_eq!(s.split(' ').count(), len));
2912 fn split_not_ascii(b: &mut Bencher) {
2913 struct NotAscii(char);
2914 impl CharEq for NotAscii {
2916 fn matches(&mut self, c: char) -> bool {
2917 let NotAscii(cc) = *self;
2920 fn only_ascii(&self) -> bool { false }
2922 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2923 let len = s.split(' ').count();
2925 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2929 fn split_extern_fn(b: &mut Bencher) {
2930 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2931 let len = s.split(' ').count();
2932 fn pred(c: char) -> bool { c == ' ' }
2934 b.iter(|| assert_eq!(s.split(pred).count(), len));
2938 fn split_closure(b: &mut Bencher) {
2939 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2940 let len = s.split(' ').count();
2942 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2946 fn split_slice(b: &mut Bencher) {
2947 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2948 let len = s.split(' ').count();
2950 let c: &[char] = &[' '];
2951 b.iter(|| assert_eq!(s.split(c).count(), len));
2955 fn bench_connect(b: &mut Bencher) {
2956 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2958 let v = vec![s, s, s, s, s, s, s, s, s, s];
2960 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2965 fn bench_contains_short_short(b: &mut Bencher) {
2966 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2970 assert!(haystack.contains(needle));
2975 fn bench_contains_short_long(b: &mut Bencher) {
2977 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2978 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2979 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2980 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2981 tempus vel, gravida nec quam.
2983 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2984 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2985 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2986 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2987 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2988 interdum. Curabitur ut nisi justo.
2990 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2991 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2992 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2993 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2994 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2995 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2996 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2997 Aliquam sit amet placerat lorem.
2999 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3000 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3001 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3002 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3003 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3006 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3007 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3008 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3009 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3010 malesuada sollicitudin quam eu fermentum.";
3011 let needle = "english";
3014 assert!(!haystack.contains(needle));
3019 fn bench_contains_bad_naive(b: &mut Bencher) {
3020 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3021 let needle = "aaaaaaaab";
3024 assert!(!haystack.contains(needle));
3029 fn bench_contains_equal(b: &mut Bencher) {
3030 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3031 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3034 assert!(haystack.contains(needle));