1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
54 use self::RecompositionState::*;
55 use self::DecompositionType::*;
57 use core::borrow::{BorrowFrom, ToOwned};
58 use core::char::CharExt;
59 use core::clone::Clone;
60 use core::iter::AdditiveIterator;
61 use core::iter::{range, Iterator, IteratorExt};
62 use core::kinds::Sized;
64 use core::option::Option::{self, Some, None};
65 use core::slice::AsSlice;
66 use core::str as core_str;
67 use unicode::str::{UnicodeStr, Utf16Encoder};
69 use ring_buf::RingBuf;
74 use slice::SliceConcatExt;
76 pub use core::str::{FromStr, Utf8Error, Str};
77 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
78 pub use core::str::{Split, SplitTerminator};
79 pub use core::str::{SplitN, RSplitN};
80 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
81 pub use core::str::{from_utf8_unchecked, from_c_str};
82 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
85 Section: Creating a string
88 impl<S: Str> SliceConcatExt<str, String> for [S] {
89 fn concat(&self) -> String {
90 let s = self.as_slice();
96 // `len` calculation may overflow but push_str will check boundaries
97 let len = s.iter().map(|s| s.as_slice().len()).sum();
98 let mut result = String::with_capacity(len);
101 result.push_str(s.as_slice())
107 fn connect(&self, sep: &str) -> String {
108 let s = self.as_slice();
111 return String::new();
119 // this is wrong without the guarantee that `self` is non-empty
120 // `len` calculation may overflow but push_str but will check boundaries
121 let len = sep.len() * (s.len() - 1)
122 + s.iter().map(|s| s.as_slice().len()).sum();
123 let mut result = String::with_capacity(len);
124 let mut first = true;
130 result.push_str(sep);
132 result.push_str(s.as_slice());
142 // Helper functions used for Unicode normalization
143 fn canonical_sort(comb: &mut [(char, u8)]) {
144 let len = comb.len();
145 for i in range(0, len) {
146 let mut swapped = false;
147 for j in range(1, len-i) {
148 let class_a = comb[j-1].1;
149 let class_b = comb[j].1;
150 if class_a != 0 && class_b != 0 && class_a > class_b {
155 if !swapped { break; }
160 enum DecompositionType {
165 /// External iterator for a string's decomposition's characters.
166 /// Use with the `std::iter` module.
168 pub struct Decompositions<'a> {
169 kind: DecompositionType,
171 buffer: Vec<(char, u8)>,
175 impl<'a> Iterator for Decompositions<'a> {
179 fn next(&mut self) -> Option<char> {
180 match self.buffer.first() {
183 self.buffer.remove(0);
186 Some(&(c, _)) if self.sorted => {
187 self.buffer.remove(0);
190 _ => self.sorted = false
194 for ch in self.iter {
195 let buffer = &mut self.buffer;
196 let sorted = &mut self.sorted;
198 let callback = |&mut: d| {
200 unicode::char::canonical_combining_class(d);
201 if class == 0 && !*sorted {
202 canonical_sort(buffer.as_mut_slice());
205 buffer.push((d, class));
209 unicode::char::decompose_canonical(ch, callback)
212 unicode::char::decompose_compatible(ch, callback)
223 canonical_sort(self.buffer.as_mut_slice());
227 if self.buffer.is_empty() {
230 match self.buffer.remove(0) {
240 fn size_hint(&self) -> (uint, Option<uint>) {
241 let (lower, _) = self.iter.size_hint();
247 enum RecompositionState {
253 /// External iterator for a string's recomposition's characters.
254 /// Use with the `std::iter` module.
256 pub struct Recompositions<'a> {
257 iter: Decompositions<'a>,
258 state: RecompositionState,
259 buffer: RingBuf<char>,
260 composee: Option<char>,
264 impl<'a> Iterator for Recompositions<'a> {
268 fn next(&mut self) -> Option<char> {
272 for ch in self.iter {
273 let ch_class = unicode::char::canonical_combining_class(ch);
274 if self.composee.is_none() {
278 self.composee = Some(ch);
281 let k = self.composee.clone().unwrap();
283 match self.last_ccc {
285 match unicode::char::compose(k, ch) {
287 self.composee = Some(r);
292 self.composee = Some(ch);
295 self.buffer.push_back(ch);
296 self.last_ccc = Some(ch_class);
301 if l_class >= ch_class {
302 // `ch` is blocked from `composee`
304 self.composee = Some(ch);
305 self.last_ccc = None;
306 self.state = Purging;
309 self.buffer.push_back(ch);
310 self.last_ccc = Some(ch_class);
313 match unicode::char::compose(k, ch) {
315 self.composee = Some(r);
319 self.buffer.push_back(ch);
320 self.last_ccc = Some(ch_class);
326 self.state = Finished;
327 if self.composee.is_some() {
328 return self.composee.take();
332 match self.buffer.pop_front() {
333 None => self.state = Composing,
338 match self.buffer.pop_front() {
339 None => return self.composee.take(),
348 /// External iterator for a string's UTF16 codeunits.
349 /// Use with the `std::iter` module.
351 pub struct Utf16Units<'a> {
352 encoder: Utf16Encoder<Chars<'a>>
355 impl<'a> Iterator for Utf16Units<'a> {
359 fn next(&mut self) -> Option<u16> { self.encoder.next() }
362 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
369 // Return the initial codepoint accumulator for the first byte.
370 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
371 // for width 3, and 3 bits for width 4
372 macro_rules! utf8_first_byte {
373 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
376 // return the value of $ch updated with continuation byte $byte
377 macro_rules! utf8_acc_cont_byte {
378 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
381 #[unstable = "trait is unstable"]
382 impl BorrowFrom<String> for str {
383 fn borrow_from(owned: &String) -> &str { owned[] }
386 #[unstable = "trait is unstable"]
387 impl ToOwned<String> for str {
388 fn to_owned(&self) -> String {
390 String::from_utf8_unchecked(self.as_bytes().to_owned())
400 Section: Trait implementations
403 /// Any string that can be represented as a slice.
404 pub trait StrExt for Sized?: ops::Slice<uint, str> {
405 /// Escapes each char in `s` with `char::escape_default`.
406 #[unstable = "return type may change to be an iterator"]
407 fn escape_default(&self) -> String {
408 self.chars().flat_map(|c| c.escape_default()).collect()
411 /// Escapes each char in `s` with `char::escape_unicode`.
412 #[unstable = "return type may change to be an iterator"]
413 fn escape_unicode(&self) -> String {
414 self.chars().flat_map(|c| c.escape_unicode()).collect()
417 /// Replaces all occurrences of one string with another.
421 /// * `from` - The string to replace
422 /// * `to` - The replacement string
426 /// The original string with all occurrences of `from` replaced with `to`.
431 /// let s = "Do you know the muffin man,
432 /// The muffin man, the muffin man, ...".to_string();
434 /// assert_eq!(s.replace("muffin man", "little lamb"),
435 /// "Do you know the little lamb,
436 /// The little lamb, the little lamb, ...".to_string());
438 /// // not found, so no change.
439 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
442 fn replace(&self, from: &str, to: &str) -> String {
443 let mut result = String::new();
444 let mut last_end = 0;
445 for (start, end) in self.match_indices(from) {
446 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
450 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
454 /// Returns an iterator over the string in Unicode Normalization Form D
455 /// (canonical decomposition).
457 #[unstable = "this functionality may be moved to libunicode"]
458 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
460 iter: self[].chars(),
467 /// Returns an iterator over the string in Unicode Normalization Form KD
468 /// (compatibility decomposition).
470 #[unstable = "this functionality may be moved to libunicode"]
471 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
473 iter: self[].chars(),
480 /// An Iterator over the string in Unicode Normalization Form C
481 /// (canonical decomposition followed by canonical composition).
483 #[unstable = "this functionality may be moved to libunicode"]
484 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
486 iter: self.nfd_chars(),
488 buffer: RingBuf::new(),
494 /// An Iterator over the string in Unicode Normalization Form KC
495 /// (compatibility decomposition followed by canonical composition).
497 #[unstable = "this functionality may be moved to libunicode"]
498 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
500 iter: self.nfkd_chars(),
502 buffer: RingBuf::new(),
508 /// Returns true if a string contains a string pattern.
512 /// - pat - The string pattern to look for
517 /// assert!("bananas".contains("nana"));
520 fn contains(&self, pat: &str) -> bool {
521 core_str::StrExt::contains(self[], pat)
524 /// Returns true if a string contains a char pattern.
528 /// - pat - The char pattern to look for
533 /// assert!("hello".contains_char('e'));
535 #[unstable = "might get removed in favour of a more generic contains()"]
536 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
537 core_str::StrExt::contains_char(self[], pat)
540 /// An iterator over the characters of `self`. Note, this iterates
541 /// over Unicode code-points, not Unicode graphemes.
546 /// let v: Vec<char> = "abc åäö".chars().collect();
547 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
550 fn chars(&self) -> Chars {
551 core_str::StrExt::chars(self[])
554 /// An iterator over the bytes of `self`
559 /// let v: Vec<u8> = "bors".bytes().collect();
560 /// assert_eq!(v, b"bors".to_vec());
563 fn bytes(&self) -> Bytes {
564 core_str::StrExt::bytes(self[])
567 /// An iterator over the characters of `self` and their byte offsets.
569 fn char_indices(&self) -> CharIndices {
570 core_str::StrExt::char_indices(self[])
573 /// An iterator over substrings of `self`, separated by characters
574 /// matched by the pattern `pat`.
579 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
580 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
582 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
583 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
585 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
586 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
588 /// let v: Vec<&str> = "".split('X').collect();
589 /// assert_eq!(v, vec![""]);
592 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
593 core_str::StrExt::split(self[], pat)
596 /// An iterator over substrings of `self`, separated by characters
597 /// matched by the pattern `pat`, restricted to splitting at most `count`
603 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
604 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
606 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
607 /// assert_eq!(v, vec!["abc", "def2ghi"]);
609 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
610 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
612 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
613 /// assert_eq!(v, vec!["abcXdef"]);
615 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
616 /// assert_eq!(v, vec![""]);
619 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
620 core_str::StrExt::splitn(self[], count, pat)
623 /// An iterator over substrings of `self`, separated by characters
624 /// matched by the pattern `pat`.
626 /// Equivalent to `split`, except that the trailing substring
627 /// is skipped if empty (terminator semantics).
632 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
633 /// assert_eq!(v, vec!["A", "B"]);
635 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
636 /// assert_eq!(v, vec!["A", "", "B", ""]);
638 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
639 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
641 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
642 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
644 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
645 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
647 #[unstable = "might get removed"]
648 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
649 core_str::StrExt::split_terminator(self[], pat)
652 /// An iterator over substrings of `self`, separated by characters
653 /// matched by the pattern `pat`, starting from the end of the string.
654 /// Restricted to splitting at most `count` times.
659 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
660 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
662 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
663 /// assert_eq!(v, vec!["ghi", "abc1def"]);
665 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
666 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
669 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
670 core_str::StrExt::rsplitn(self[], count, pat)
673 /// An iterator over the start and end indices of the disjoint
674 /// matches of the pattern `pat` within `self`.
676 /// That is, each returned value `(start, end)` satisfies
677 /// `self.slice(start, end) == sep`. For matches of `sep` within
678 /// `self` that overlap, only the indices corresponding to the
679 /// first match are returned.
684 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
685 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
687 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
688 /// assert_eq!(v, vec![(1,4), (4,7)]);
690 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
691 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
693 #[unstable = "might have its iterator type changed"]
694 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
695 core_str::StrExt::match_indices(self[], pat)
698 /// An iterator over the substrings of `self` separated by the pattern `sep`.
703 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
704 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
706 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
707 /// assert_eq!(v, vec!["1", "", "2"]);
709 #[unstable = "might get removed in the future in favor of a more generic split()"]
710 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
711 core_str::StrExt::split_str(self[], pat)
714 /// An iterator over the lines of a string (subsequences separated
715 /// by `\n`). This does not include the empty string after a
721 /// let four_lines = "foo\nbar\n\nbaz\n";
722 /// let v: Vec<&str> = four_lines.lines().collect();
723 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
726 fn lines(&self) -> Lines {
727 core_str::StrExt::lines(self[])
730 /// An iterator over the lines of a string, separated by either
731 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
732 /// empty trailing line.
737 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
738 /// let v: Vec<&str> = four_lines.lines_any().collect();
739 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
742 fn lines_any(&self) -> LinesAny {
743 core_str::StrExt::lines_any(self[])
746 /// Returns a slice of the given string from the byte range
747 /// [`begin`..`end`).
749 /// This operation is `O(1)`.
751 /// Panics when `begin` and `end` do not point to valid characters
752 /// or point beyond the last character of the string.
754 /// See also `slice_to` and `slice_from` for slicing prefixes and
755 /// suffixes of strings, and `slice_chars` for slicing based on
756 /// code point counts.
761 /// let s = "Löwe 老虎 Léopard";
762 /// assert_eq!(s.slice(0, 1), "L");
764 /// assert_eq!(s.slice(1, 9), "öwe 老");
766 /// // these will panic:
767 /// // byte 2 lies within `ö`:
768 /// // s.slice(2, 3);
770 /// // byte 8 lies within `老`
771 /// // s.slice(1, 8);
773 /// // byte 100 is outside the string
774 /// // s.slice(3, 100);
776 #[unstable = "use slice notation [a..b] instead"]
777 fn slice(&self, begin: uint, end: uint) -> &str {
778 core_str::StrExt::slice(self[], begin, end)
781 /// Returns a slice of the string from `begin` to its end.
783 /// Equivalent to `self.slice(begin, self.len())`.
785 /// Panics when `begin` does not point to a valid character, or is
788 /// See also `slice`, `slice_to` and `slice_chars`.
789 #[unstable = "use slice notation [a..] instead"]
790 fn slice_from(&self, begin: uint) -> &str {
791 core_str::StrExt::slice_from(self[], begin)
794 /// Returns a slice of the string from the beginning to byte
797 /// Equivalent to `self.slice(0, end)`.
799 /// Panics when `end` does not point to a valid character, or is
802 /// See also `slice`, `slice_from` and `slice_chars`.
803 #[unstable = "use slice notation [0..a] instead"]
804 fn slice_to(&self, end: uint) -> &str {
805 core_str::StrExt::slice_to(self[], end)
808 /// Returns a slice of the string from the character range
809 /// [`begin`..`end`).
811 /// That is, start at the `begin`-th code point of the string and
812 /// continue to the `end`-th code point. This does not detect or
813 /// handle edge cases such as leaving a combining character as the
814 /// first code point of the string.
816 /// Due to the design of UTF-8, this operation is `O(end)`.
817 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
818 /// variants that use byte indices rather than code point
821 /// Panics if `begin` > `end` or the either `begin` or `end` are
822 /// beyond the last character of the string.
827 /// let s = "Löwe 老虎 Léopard";
828 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
829 /// assert_eq!(s.slice_chars(5, 7), "老虎");
831 #[unstable = "may have yet to prove its worth"]
832 fn slice_chars(&self, begin: uint, end: uint) -> &str {
833 core_str::StrExt::slice_chars(self[], begin, end)
836 /// Takes a bytewise (not UTF-8) slice from a string.
838 /// Returns the substring from [`begin`..`end`).
840 /// Caller must check both UTF-8 character boundaries and the boundaries of
841 /// the entire slice as well.
843 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
844 core_str::StrExt::slice_unchecked(self[], begin, end)
847 /// Returns true if the pattern `pat` is a prefix of the string.
852 /// assert!("banana".starts_with("ba"));
855 fn starts_with(&self, pat: &str) -> bool {
856 core_str::StrExt::starts_with(self[], pat)
859 /// Returns true if the pattern `pat` is a suffix of the string.
864 /// assert!("banana".ends_with("nana"));
867 fn ends_with(&self, pat: &str) -> bool {
868 core_str::StrExt::ends_with(self[], pat)
871 /// Returns a string with all pre- and suffixes that match
872 /// the pattern `pat` repeatedly removed.
876 /// * pat - a string pattern
881 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
882 /// let x: &[_] = &['1', '2'];
883 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
884 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
887 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
888 core_str::StrExt::trim_matches(self[], pat)
891 /// Returns a string with all prefixes that match
892 /// the pattern `pat` repeatedly removed.
896 /// * pat - a string pattern
901 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
902 /// let x: &[_] = &['1', '2'];
903 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
904 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
907 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
908 core_str::StrExt::trim_left_matches(self[], pat)
911 /// Returns a string with all suffixes that match
912 /// the pattern `pat` repeatedly removed.
916 /// * pat - a string pattern
921 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
922 /// let x: &[_] = &['1', '2'];
923 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
924 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
927 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
928 core_str::StrExt::trim_right_matches(self[], pat)
931 /// Check that `index`-th byte lies at the start and/or end of a
932 /// UTF-8 code point sequence.
934 /// The start and end of the string (when `index == self.len()`)
935 /// are considered to be boundaries.
937 /// Panics if `index` is greater than `self.len()`.
942 /// let s = "Löwe 老虎 Léopard";
943 /// assert!(s.is_char_boundary(0));
945 /// assert!(s.is_char_boundary(6));
946 /// assert!(s.is_char_boundary(s.len()));
948 /// // second byte of `ö`
949 /// assert!(!s.is_char_boundary(2));
951 /// // third byte of `老`
952 /// assert!(!s.is_char_boundary(8));
954 #[unstable = "naming is uncertain with container conventions"]
955 fn is_char_boundary(&self, index: uint) -> bool {
956 core_str::StrExt::is_char_boundary(self[], index)
959 /// Pluck a character out of a string and return the index of the next
962 /// This function can be used to iterate over the Unicode characters of a
967 /// This example manually iterates through the characters of a
968 /// string; this should normally be done by `.chars()` or
972 /// use std::str::CharRange;
974 /// let s = "中华Việt Nam";
976 /// while i < s.len() {
977 /// let CharRange {ch, next} = s.char_range_at(i);
978 /// println!("{}: {}", i, ch);
1000 /// * s - The string
1001 /// * i - The byte offset of the char to extract
1005 /// A record {ch: char, next: uint} containing the char value and the byte
1006 /// index of the next Unicode character.
1010 /// If `i` is greater than or equal to the length of the string.
1011 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1012 #[unstable = "naming is uncertain with container conventions"]
1013 fn char_range_at(&self, start: uint) -> CharRange {
1014 core_str::StrExt::char_range_at(self[], start)
1017 /// Given a byte position and a str, return the previous char and its position.
1019 /// This function can be used to iterate over a Unicode string in reverse.
1021 /// Returns 0 for next index if called on start index 0.
1025 /// If `i` is greater than the length of the string.
1026 /// If `i` is not an index following a valid UTF-8 character.
1027 #[unstable = "naming is uncertain with container conventions"]
1028 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1029 core_str::StrExt::char_range_at_reverse(self[], start)
1032 /// Plucks the character starting at the `i`th byte of a string.
1038 /// assert_eq!(s.char_at(1), 'b');
1039 /// assert_eq!(s.char_at(2), 'π');
1040 /// assert_eq!(s.char_at(4), 'c');
1045 /// If `i` is greater than or equal to the length of the string.
1046 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1047 #[unstable = "naming is uncertain with container conventions"]
1048 fn char_at(&self, i: uint) -> char {
1049 core_str::StrExt::char_at(self[], i)
1052 /// Plucks the character ending at the `i`th byte of a string.
1056 /// If `i` is greater than the length of the string.
1057 /// If `i` is not an index following a valid UTF-8 character.
1058 #[unstable = "naming is uncertain with container conventions"]
1059 fn char_at_reverse(&self, i: uint) -> char {
1060 core_str::StrExt::char_at_reverse(self[], i)
1063 /// Work with the byte buffer of a string as a byte slice.
1068 /// assert_eq!("bors".as_bytes(), b"bors");
1071 fn as_bytes(&self) -> &[u8] {
1072 core_str::StrExt::as_bytes(self[])
1075 /// Returns the byte index of the first character of `self` that
1076 /// matches the pattern `pat`.
1080 /// `Some` containing the byte index of the last matching character
1081 /// or `None` if there is no match
1086 /// let s = "Löwe 老虎 Léopard";
1088 /// assert_eq!(s.find('L'), Some(0));
1089 /// assert_eq!(s.find('é'), Some(14));
1091 /// // the first space
1092 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1094 /// // neither are found
1095 /// let x: &[_] = &['1', '2'];
1096 /// assert_eq!(s.find(x), None);
1099 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1100 core_str::StrExt::find(self[], pat)
1103 /// Returns the byte index of the last character of `self` that
1104 /// matches the pattern `pat`.
1108 /// `Some` containing the byte index of the last matching character
1109 /// or `None` if there is no match.
1114 /// let s = "Löwe 老虎 Léopard";
1116 /// assert_eq!(s.rfind('L'), Some(13));
1117 /// assert_eq!(s.rfind('é'), Some(14));
1119 /// // the second space
1120 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1122 /// // searches for an occurrence of either `1` or `2`, but neither are found
1123 /// let x: &[_] = &['1', '2'];
1124 /// assert_eq!(s.rfind(x), None);
1127 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1128 core_str::StrExt::rfind(self[], pat)
1131 /// Returns the byte index of the first matching substring
1135 /// * `needle` - The string to search for
1139 /// `Some` containing the byte index of the first matching substring
1140 /// or `None` if there is no match.
1145 /// let s = "Löwe 老虎 Léopard";
1147 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1148 /// assert_eq!(s.find_str("muffin man"), None);
1150 #[unstable = "might get removed in favor of a more generic find in the future"]
1151 fn find_str(&self, needle: &str) -> Option<uint> {
1152 core_str::StrExt::find_str(self[], needle)
1155 /// Retrieves the first character from a string slice and returns
1156 /// it. This does not allocate a new string; instead, it returns a
1157 /// slice that point one character beyond the character that was
1158 /// shifted. If the string does not contain any characters,
1159 /// None is returned instead.
1164 /// let s = "Löwe 老虎 Léopard";
1165 /// let (c, s1) = s.slice_shift_char().unwrap();
1166 /// assert_eq!(c, 'L');
1167 /// assert_eq!(s1, "öwe 老虎 Léopard");
1169 /// let (c, s2) = s1.slice_shift_char().unwrap();
1170 /// assert_eq!(c, 'ö');
1171 /// assert_eq!(s2, "we 老虎 Léopard");
1173 #[unstable = "awaiting conventions about shifting and slices"]
1174 fn slice_shift_char(&self) -> Option<(char, &str)> {
1175 core_str::StrExt::slice_shift_char(self[])
1178 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1180 /// Panics if `inner` is not a direct slice contained within self.
1185 /// let string = "a\nb\nc";
1186 /// let lines: Vec<&str> = string.lines().collect();
1188 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1189 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1190 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1192 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1193 fn subslice_offset(&self, inner: &str) -> uint {
1194 core_str::StrExt::subslice_offset(self[], inner)
1197 /// Return an unsafe pointer to the strings buffer.
1199 /// The caller must ensure that the string outlives this pointer,
1200 /// and that it is not reallocated (e.g. by pushing to the
1204 fn as_ptr(&self) -> *const u8 {
1205 core_str::StrExt::as_ptr(self[])
1208 /// Return an iterator of `u16` over the string encoded as UTF-16.
1209 #[unstable = "this functionality may only be provided by libunicode"]
1210 fn utf16_units(&self) -> Utf16Units {
1211 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1214 /// Return the number of bytes in this string
1219 /// assert_eq!("foo".len(), 3);
1220 /// assert_eq!("ƒoo".len(), 4);
1224 fn len(&self) -> uint {
1225 core_str::StrExt::len(self[])
1228 /// Returns true if this slice contains no bytes
1233 /// assert!("".is_empty());
1237 fn is_empty(&self) -> bool {
1238 core_str::StrExt::is_empty(self[])
1241 /// Parse this string into the specified type.
1246 /// assert_eq!("4".parse::<u32>(), Some(4));
1247 /// assert_eq!("j".parse::<u32>(), None);
1250 #[unstable = "this method was just created"]
1251 fn parse<F: FromStr>(&self) -> Option<F> {
1252 core_str::StrExt::parse(self[])
1255 /// Returns an iterator over the
1256 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1259 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1260 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1261 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1262 /// recommends extended grapheme cluster boundaries for general processing.
1267 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1268 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1269 /// assert_eq!(gr1.as_slice(), b);
1270 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1271 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1272 /// assert_eq!(gr2.as_slice(), b);
1274 #[unstable = "this functionality may only be provided by libunicode"]
1275 fn graphemes(&self, is_extended: bool) -> Graphemes {
1276 UnicodeStr::graphemes(self[], is_extended)
1279 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1280 /// See `graphemes()` method for more information.
1285 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1286 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1287 /// assert_eq!(gr_inds.as_slice(), b);
1289 #[unstable = "this functionality may only be provided by libunicode"]
1290 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1291 UnicodeStr::grapheme_indices(self[], is_extended)
1294 /// An iterator over the words of a string (subsequences separated
1295 /// by any sequence of whitespace). Sequences of whitespace are
1296 /// collapsed, so empty "words" are not included.
1301 /// let some_words = " Mary had\ta little \n\t lamb";
1302 /// let v: Vec<&str> = some_words.words().collect();
1303 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1306 fn words(&self) -> Words {
1307 UnicodeStr::words(self[])
1310 /// Returns a string's displayed width in columns, treating control
1311 /// characters as zero-width.
1313 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1314 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1315 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1316 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1317 /// recommends that these characters be treated as 1 column (i.e.,
1318 /// `is_cjk` = `false`) if the locale is unknown.
1319 #[unstable = "this functionality may only be provided by libunicode"]
1320 fn width(&self, is_cjk: bool) -> uint {
1321 UnicodeStr::width(self[], is_cjk)
1324 /// Returns a string with leading and trailing whitespace removed.
1326 fn trim(&self) -> &str {
1327 UnicodeStr::trim(self[])
1330 /// Returns a string with leading whitespace removed.
1332 fn trim_left(&self) -> &str {
1333 UnicodeStr::trim_left(self[])
1336 /// Returns a string with trailing whitespace removed.
1338 fn trim_right(&self) -> &str {
1339 UnicodeStr::trim_right(self[])
1343 impl StrExt for str {}
1349 use core::iter::AdditiveIterator;
1350 use super::from_utf8;
1351 use super::Utf8Error;
1356 assert!("" <= "foo");
1357 assert!("foo" <= "foo");
1358 assert!("foo" != "bar");
1363 assert_eq!("".len(), 0u);
1364 assert_eq!("hello world".len(), 11u);
1365 assert_eq!("\x63".len(), 1u);
1366 assert_eq!("\u{a2}".len(), 2u);
1367 assert_eq!("\u{3c0}".len(), 2u);
1368 assert_eq!("\u{2620}".len(), 3u);
1369 assert_eq!("\u{1d11e}".len(), 4u);
1371 assert_eq!("".chars().count(), 0u);
1372 assert_eq!("hello world".chars().count(), 11u);
1373 assert_eq!("\x63".chars().count(), 1u);
1374 assert_eq!("\u{a2}".chars().count(), 1u);
1375 assert_eq!("\u{3c0}".chars().count(), 1u);
1376 assert_eq!("\u{2620}".chars().count(), 1u);
1377 assert_eq!("\u{1d11e}".chars().count(), 1u);
1378 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1380 assert_eq!("hello".width(false), 10u);
1381 assert_eq!("hello".width(true), 10u);
1382 assert_eq!("\0\0\0\0\0".width(false), 0u);
1383 assert_eq!("\0\0\0\0\0".width(true), 0u);
1384 assert_eq!("".width(false), 0u);
1385 assert_eq!("".width(true), 0u);
1386 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1387 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1392 assert_eq!("hello".find('l'), Some(2u));
1393 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1394 assert!("hello".find('x').is_none());
1395 assert!("hello".find(|&: c:char| c == 'x').is_none());
1396 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1397 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1402 assert_eq!("hello".rfind('l'), Some(3u));
1403 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1404 assert!("hello".rfind('x').is_none());
1405 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1406 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1407 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1412 let empty = String::from_str("");
1413 let s: String = empty.chars().collect();
1414 assert_eq!(empty, s);
1415 let data = String::from_str("ประเทศไทย中");
1416 let s: String = data.chars().collect();
1417 assert_eq!(data, s);
1421 fn test_into_bytes() {
1422 let data = String::from_str("asdf");
1423 let buf = data.into_bytes();
1424 assert_eq!(b"asdf", buf);
1428 fn test_find_str() {
1430 assert_eq!("".find_str(""), Some(0u));
1431 assert!("banana".find_str("apple pie").is_none());
1433 let data = "abcabc";
1434 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1435 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1436 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1438 let string = "ประเทศไทย中华Việt Nam";
1439 let mut data = String::from_str(string);
1440 data.push_str(string);
1441 assert!(data.find_str("ไท华").is_none());
1442 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1443 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1445 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1446 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1447 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1448 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1449 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1451 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1452 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1453 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1454 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1455 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1459 fn test_slice_chars() {
1460 fn t(a: &str, b: &str, start: uint) {
1461 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1464 t("hello", "llo", 2);
1465 t("hello", "el", 1);
1468 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1471 fn s(x: &str) -> String { x.to_string() }
1473 macro_rules! test_concat {
1474 ($expected: expr, $string: expr) => {
1476 let s: String = $string.concat();
1477 assert_eq!($expected, s);
1483 fn test_concat_for_different_types() {
1484 test_concat!("ab", vec![s("a"), s("b")]);
1485 test_concat!("ab", vec!["a", "b"]);
1486 test_concat!("ab", vec!["a", "b"].as_slice());
1487 test_concat!("ab", vec![s("a"), s("b")]);
1491 fn test_concat_for_different_lengths() {
1492 let empty: &[&str] = &[];
1493 test_concat!("", empty);
1494 test_concat!("a", ["a"]);
1495 test_concat!("ab", ["a", "b"]);
1496 test_concat!("abc", ["", "a", "bc"]);
1499 macro_rules! test_connect {
1500 ($expected: expr, $string: expr, $delim: expr) => {
1502 let s = $string.connect($delim);
1503 assert_eq!($expected, s);
1509 fn test_connect_for_different_types() {
1510 test_connect!("a-b", ["a", "b"], "-");
1511 let hyphen = "-".to_string();
1512 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1513 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1514 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1515 test_connect!("a-b", vec![s("a"), s("b")], "-");
1519 fn test_connect_for_different_lengths() {
1520 let empty: &[&str] = &[];
1521 test_connect!("", empty, "-");
1522 test_connect!("a", ["a"], "-");
1523 test_connect!("a-b", ["a", "b"], "-");
1524 test_connect!("-a-bc", ["", "a", "bc"], "-");
1528 fn test_unsafe_slice() {
1529 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1530 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1531 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1532 fn a_million_letter_a() -> String {
1534 let mut rs = String::new();
1536 rs.push_str("aaaaaaaaaa");
1541 fn half_a_million_letter_a() -> String {
1543 let mut rs = String::new();
1545 rs.push_str("aaaaa");
1550 let letters = a_million_letter_a();
1551 assert!(half_a_million_letter_a() ==
1552 unsafe {String::from_str(letters.slice_unchecked(
1558 fn test_starts_with() {
1559 assert!(("".starts_with("")));
1560 assert!(("abc".starts_with("")));
1561 assert!(("abc".starts_with("a")));
1562 assert!((!"a".starts_with("abc")));
1563 assert!((!"".starts_with("abc")));
1564 assert!((!"ödd".starts_with("-")));
1565 assert!(("ödd".starts_with("öd")));
1569 fn test_ends_with() {
1570 assert!(("".ends_with("")));
1571 assert!(("abc".ends_with("")));
1572 assert!(("abc".ends_with("c")));
1573 assert!((!"a".ends_with("abc")));
1574 assert!((!"".ends_with("abc")));
1575 assert!((!"ddö".ends_with("-")));
1576 assert!(("ddö".ends_with("dö")));
1580 fn test_is_empty() {
1581 assert!("".is_empty());
1582 assert!(!"a".is_empty());
1588 assert_eq!("".replace(a, "b"), String::from_str(""));
1589 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1590 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1592 assert!(" test test ".replace(test, "toast") ==
1593 String::from_str(" toast toast "));
1594 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1598 fn test_replace_2a() {
1599 let data = "ประเทศไทย中华";
1600 let repl = "دولة الكويت";
1603 let a2 = "دولة الكويتทศไทย中华";
1604 assert_eq!(data.replace(a, repl), a2);
1608 fn test_replace_2b() {
1609 let data = "ประเทศไทย中华";
1610 let repl = "دولة الكويت";
1613 let b2 = "ปรدولة الكويتทศไทย中华";
1614 assert_eq!(data.replace(b, repl), b2);
1618 fn test_replace_2c() {
1619 let data = "ประเทศไทย中华";
1620 let repl = "دولة الكويت";
1623 let c2 = "ประเทศไทยدولة الكويت";
1624 assert_eq!(data.replace(c, repl), c2);
1628 fn test_replace_2d() {
1629 let data = "ประเทศไทย中华";
1630 let repl = "دولة الكويت";
1633 assert_eq!(data.replace(d, repl), data);
1638 assert_eq!("ab", "abc".slice(0, 2));
1639 assert_eq!("bc", "abc".slice(1, 3));
1640 assert_eq!("", "abc".slice(1, 1));
1641 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1643 let data = "ประเทศไทย中华";
1644 assert_eq!("ป", data.slice(0, 3));
1645 assert_eq!("ร", data.slice(3, 6));
1646 assert_eq!("", data.slice(3, 3));
1647 assert_eq!("华", data.slice(30, 33));
1649 fn a_million_letter_x() -> String {
1651 let mut rs = String::new();
1653 rs.push_str("华华华华华华华华华华");
1658 fn half_a_million_letter_x() -> String {
1660 let mut rs = String::new();
1662 rs.push_str("华华华华华");
1667 let letters = a_million_letter_x();
1668 assert!(half_a_million_letter_x() ==
1669 String::from_str(letters.slice(0u, 3u * 500000u)));
1674 let ss = "中华Việt Nam";
1676 assert_eq!("华", ss.slice(3u, 6u));
1677 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1679 assert_eq!("ab", "abc".slice(0u, 2u));
1680 assert_eq!("bc", "abc".slice(1u, 3u));
1681 assert_eq!("", "abc".slice(1u, 1u));
1683 assert_eq!("中", ss.slice(0u, 3u));
1684 assert_eq!("华V", ss.slice(3u, 7u));
1685 assert_eq!("", ss.slice(3u, 3u));
1700 fn test_slice_fail() {
1701 "中华Việt Nam".slice(0u, 2u);
1705 fn test_slice_from() {
1706 assert_eq!("abcd".slice_from(0), "abcd");
1707 assert_eq!("abcd".slice_from(2), "cd");
1708 assert_eq!("abcd".slice_from(4), "");
1711 fn test_slice_to() {
1712 assert_eq!("abcd".slice_to(0), "");
1713 assert_eq!("abcd".slice_to(2), "ab");
1714 assert_eq!("abcd".slice_to(4), "abcd");
1718 fn test_trim_left_matches() {
1719 let v: &[char] = &[];
1720 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1721 let chars: &[char] = &['*', ' '];
1722 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1723 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1724 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1726 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1727 let chars: &[char] = &['1', '2'];
1728 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1729 assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1733 fn test_trim_right_matches() {
1734 let v: &[char] = &[];
1735 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1736 let chars: &[char] = &['*', ' '];
1737 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1738 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1739 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1741 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1742 let chars: &[char] = &['1', '2'];
1743 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1744 assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1748 fn test_trim_matches() {
1749 let v: &[char] = &[];
1750 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1751 let chars: &[char] = &['*', ' '];
1752 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1753 assert_eq!(" *** *** ".trim_matches(chars), "");
1754 assert_eq!("foo".trim_matches(chars), "foo");
1756 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1757 let chars: &[char] = &['1', '2'];
1758 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1759 assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1763 fn test_trim_left() {
1764 assert_eq!("".trim_left(), "");
1765 assert_eq!("a".trim_left(), "a");
1766 assert_eq!(" ".trim_left(), "");
1767 assert_eq!(" blah".trim_left(), "blah");
1768 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1769 assert_eq!("hey ".trim_left(), "hey ");
1773 fn test_trim_right() {
1774 assert_eq!("".trim_right(), "");
1775 assert_eq!("a".trim_right(), "a");
1776 assert_eq!(" ".trim_right(), "");
1777 assert_eq!("blah ".trim_right(), "blah");
1778 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1779 assert_eq!(" hey".trim_right(), " hey");
1784 assert_eq!("".trim(), "");
1785 assert_eq!("a".trim(), "a");
1786 assert_eq!(" ".trim(), "");
1787 assert_eq!(" blah ".trim(), "blah");
1788 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1789 assert_eq!(" hey dude ".trim(), "hey dude");
1793 fn test_is_whitespace() {
1794 assert!("".chars().all(|c| c.is_whitespace()));
1795 assert!(" ".chars().all(|c| c.is_whitespace()));
1796 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1797 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1798 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1802 fn test_slice_shift_char() {
1803 let data = "ประเทศไทย中";
1804 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1808 fn test_slice_shift_char_2() {
1810 assert_eq!(empty.slice_shift_char(), None);
1815 // deny overlong encodings
1816 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1817 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1818 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1819 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1820 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1821 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1822 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1825 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1826 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1828 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1829 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1830 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1831 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1832 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1833 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1834 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1835 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1839 fn test_is_utf16() {
1840 use unicode::str::is_utf16;
1842 ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } }
1851 // surrogate pairs (randomly generated with Python 3's
1852 // .encode('utf-16be'))
1853 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1854 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1855 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1857 // mixtures (also random)
1858 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1859 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1860 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1864 ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } }
1868 // surrogate + regular unit
1870 // surrogate + lead surrogate
1872 // unterminated surrogate
1874 // trail surrogate without a lead
1877 // random byte sequences that Python 3's .decode('utf-16be')
1879 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1880 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1881 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1882 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1883 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1884 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1885 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1886 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1887 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1888 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1889 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1890 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1891 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1892 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1893 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1894 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1895 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1896 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1897 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1898 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1899 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1903 fn test_as_bytes() {
1906 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1907 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1911 assert_eq!("".as_bytes(), b);
1912 assert_eq!("abc".as_bytes(), b"abc");
1913 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1918 fn test_as_bytes_fail() {
1919 // Don't double free. (I'm not sure if this exercises the
1920 // original problem code path anymore.)
1921 let s = String::from_str("");
1922 let _bytes = s.as_bytes();
1928 let buf = "hello".as_ptr();
1930 assert_eq!(*buf.offset(0), b'h');
1931 assert_eq!(*buf.offset(1), b'e');
1932 assert_eq!(*buf.offset(2), b'l');
1933 assert_eq!(*buf.offset(3), b'l');
1934 assert_eq!(*buf.offset(4), b'o');
1939 fn test_subslice_offset() {
1940 let a = "kernelsprite";
1941 let b = a.slice(7, a.len());
1942 let c = a.slice(0, a.len() - 6);
1943 assert_eq!(a.subslice_offset(b), 7);
1944 assert_eq!(a.subslice_offset(c), 0);
1946 let string = "a\nb\nc";
1947 let lines: Vec<&str> = string.lines().collect();
1948 assert_eq!(string.subslice_offset(lines[0]), 0);
1949 assert_eq!(string.subslice_offset(lines[1]), 2);
1950 assert_eq!(string.subslice_offset(lines[2]), 4);
1955 fn test_subslice_offset_2() {
1956 let a = "alchemiter";
1957 let b = "cruxtruder";
1958 a.subslice_offset(b);
1962 fn vec_str_conversions() {
1963 let s1: String = String::from_str("All mimsy were the borogoves");
1965 let v: Vec<u8> = s1.as_bytes().to_vec();
1966 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1967 let mut i: uint = 0u;
1968 let n1: uint = s1.len();
1969 let n2: uint = v.len();
1972 let a: u8 = s1.as_bytes()[i];
1973 let b: u8 = s2.as_bytes()[i];
1982 fn test_contains() {
1983 assert!("abcde".contains("bcd"));
1984 assert!("abcde".contains("abcd"));
1985 assert!("abcde".contains("bcde"));
1986 assert!("abcde".contains(""));
1987 assert!("".contains(""));
1988 assert!(!"abcde".contains("def"));
1989 assert!(!"".contains("a"));
1991 let data = "ประเทศไทย中华Việt Nam";
1992 assert!(data.contains("ประเ"));
1993 assert!(data.contains("ะเ"));
1994 assert!(data.contains("中华"));
1995 assert!(!data.contains("ไท华"));
1999 fn test_contains_char() {
2000 assert!("abc".contains_char('b'));
2001 assert!("a".contains_char('a'));
2002 assert!(!"abc".contains_char('d'));
2003 assert!(!"".contains_char('a'));
2008 let s = "ศไทย中华Việt Nam";
2009 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2011 for ch in v.iter() {
2012 assert!(s.char_at(pos) == *ch);
2013 pos += ch.to_string().len();
2018 fn test_char_at_reverse() {
2019 let s = "ศไทย中华Việt Nam";
2020 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2021 let mut pos = s.len();
2022 for ch in v.iter().rev() {
2023 assert!(s.char_at_reverse(pos) == *ch);
2024 pos -= ch.to_string().len();
2029 fn test_escape_unicode() {
2030 assert_eq!("abc".escape_unicode(),
2031 String::from_str("\\u{61}\\u{62}\\u{63}"));
2032 assert_eq!("a c".escape_unicode(),
2033 String::from_str("\\u{61}\\u{20}\\u{63}"));
2034 assert_eq!("\r\n\t".escape_unicode(),
2035 String::from_str("\\u{d}\\u{a}\\u{9}"));
2036 assert_eq!("'\"\\".escape_unicode(),
2037 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2038 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2039 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2040 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2041 String::from_str("\\u{100}\\u{ffff}"));
2042 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2043 String::from_str("\\u{10000}\\u{10ffff}"));
2044 assert_eq!("ab\u{fb00}".escape_unicode(),
2045 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2046 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2047 String::from_str("\\u{1d4ea}\\u{d}"));
2051 fn test_escape_default() {
2052 assert_eq!("abc".escape_default(), String::from_str("abc"));
2053 assert_eq!("a c".escape_default(), String::from_str("a c"));
2054 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2055 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2056 assert_eq!("\u{100}\u{ffff}".escape_default(),
2057 String::from_str("\\u{100}\\u{ffff}"));
2058 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2059 String::from_str("\\u{10000}\\u{10ffff}"));
2060 assert_eq!("ab\u{fb00}".escape_default(),
2061 String::from_str("ab\\u{fb00}"));
2062 assert_eq!("\u{1d4ea}\r".escape_default(),
2063 String::from_str("\\u{1d4ea}\\r"));
2067 fn test_total_ord() {
2068 "1234".cmp("123") == Greater;
2069 "123".cmp("1234") == Less;
2070 "1234".cmp("1234") == Equal;
2071 "12345555".cmp("123456") == Less;
2072 "22".cmp("1234") == Greater;
2076 fn test_char_range_at() {
2077 let data = "b¢€𤭢𤭢€¢b";
2078 assert_eq!('b', data.char_range_at(0).ch);
2079 assert_eq!('¢', data.char_range_at(1).ch);
2080 assert_eq!('€', data.char_range_at(3).ch);
2081 assert_eq!('𤭢', data.char_range_at(6).ch);
2082 assert_eq!('𤭢', data.char_range_at(10).ch);
2083 assert_eq!('€', data.char_range_at(14).ch);
2084 assert_eq!('¢', data.char_range_at(17).ch);
2085 assert_eq!('b', data.char_range_at(19).ch);
2089 fn test_char_range_at_reverse_underflow() {
2090 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2094 fn test_iterator() {
2095 let s = "ศไทย中华Việt Nam";
2096 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2099 let mut it = s.chars();
2102 assert_eq!(c, v[pos]);
2105 assert_eq!(pos, v.len());
2109 fn test_rev_iterator() {
2110 let s = "ศไทย中华Việt Nam";
2111 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2114 let mut it = s.chars().rev();
2117 assert_eq!(c, v[pos]);
2120 assert_eq!(pos, v.len());
2124 fn test_chars_decoding() {
2125 let mut bytes = [0u8; 4];
2126 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2127 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2128 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2129 if Some(c) != s.chars().next() {
2130 panic!("character {:x}={} does not decode correctly", c as u32, c);
2136 fn test_chars_rev_decoding() {
2137 let mut bytes = [0u8; 4];
2138 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2139 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2140 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2141 if Some(c) != s.chars().rev().next() {
2142 panic!("character {:x}={} does not decode correctly", c as u32, c);
2148 fn test_iterator_clone() {
2149 let s = "ศไทย中华Việt Nam";
2150 let mut it = s.chars();
2152 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2156 fn test_bytesator() {
2157 let s = "ศไทย中华Việt Nam";
2159 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2160 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2165 for b in s.bytes() {
2166 assert_eq!(b, v[pos]);
2172 fn test_bytes_revator() {
2173 let s = "ศไทย中华Việt Nam";
2175 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2176 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2179 let mut pos = v.len();
2181 for b in s.bytes().rev() {
2183 assert_eq!(b, v[pos]);
2188 fn test_char_indicesator() {
2189 let s = "ศไทย中华Việt Nam";
2190 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2191 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2194 let mut it = s.char_indices();
2197 assert_eq!(c, (p[pos], v[pos]));
2200 assert_eq!(pos, v.len());
2201 assert_eq!(pos, p.len());
2205 fn test_char_indices_revator() {
2206 let s = "ศไทย中华Việt Nam";
2207 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2208 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2211 let mut it = s.char_indices().rev();
2214 assert_eq!(c, (p[pos], v[pos]));
2217 assert_eq!(pos, v.len());
2218 assert_eq!(pos, p.len());
2222 fn test_splitn_char_iterator() {
2223 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2225 let split: Vec<&str> = data.splitn(3, ' ').collect();
2226 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2228 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2229 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2232 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2233 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2235 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2236 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2240 fn test_split_char_iterator_no_trailing() {
2241 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2243 let split: Vec<&str> = data.split('\n').collect();
2244 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2246 let split: Vec<&str> = data.split_terminator('\n').collect();
2247 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2252 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2253 let words: Vec<&str> = data.words().collect();
2254 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2258 fn test_nfd_chars() {
2260 ($input: expr, $expected: expr) => {
2261 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2265 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2266 t!("\u{2026}", "\u{2026}");
2267 t!("\u{2126}", "\u{3a9}");
2268 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2269 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2270 t!("a\u{301}", "a\u{301}");
2271 t!("\u{301}a", "\u{301}a");
2272 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2273 t!("\u{ac1c}", "\u{1100}\u{1162}");
2277 fn test_nfkd_chars() {
2279 ($input: expr, $expected: expr) => {
2280 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2284 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2285 t!("\u{2026}", "...");
2286 t!("\u{2126}", "\u{3a9}");
2287 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2288 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2289 t!("a\u{301}", "a\u{301}");
2290 t!("\u{301}a", "\u{301}a");
2291 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2292 t!("\u{ac1c}", "\u{1100}\u{1162}");
2296 fn test_nfc_chars() {
2298 ($input: expr, $expected: expr) => {
2299 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2303 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2304 t!("\u{2026}", "\u{2026}");
2305 t!("\u{2126}", "\u{3a9}");
2306 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2307 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2308 t!("a\u{301}", "\u{e1}");
2309 t!("\u{301}a", "\u{301}a");
2310 t!("\u{d4db}", "\u{d4db}");
2311 t!("\u{ac1c}", "\u{ac1c}");
2312 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2316 fn test_nfkc_chars() {
2318 ($input: expr, $expected: expr) => {
2319 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2323 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2324 t!("\u{2026}", "...");
2325 t!("\u{2126}", "\u{3a9}");
2326 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2327 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2328 t!("a\u{301}", "\u{e1}");
2329 t!("\u{301}a", "\u{301}a");
2330 t!("\u{d4db}", "\u{d4db}");
2331 t!("\u{ac1c}", "\u{ac1c}");
2332 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2337 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2338 let lines: Vec<&str> = data.lines().collect();
2339 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2341 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2342 let lines: Vec<&str> = data.lines().collect();
2343 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2347 fn test_graphemes() {
2348 use core::iter::order;
2349 // official Unicode test data
2350 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2351 let test_same: [(_, &[_]); 325] = [
2352 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2353 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2354 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2355 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2356 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2357 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2358 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2359 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2360 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2361 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2362 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2363 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2364 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2365 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2366 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2367 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2368 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2369 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2370 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2371 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2372 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2373 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2374 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2375 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2376 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2377 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2378 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2379 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2380 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2381 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2382 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2383 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2384 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2385 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2386 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2387 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2388 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2389 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2390 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2391 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2392 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2393 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2394 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2395 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2396 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2397 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2398 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2399 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2400 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2401 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2402 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2403 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2404 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2405 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2406 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2407 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2408 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2409 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2410 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2411 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2412 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2413 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2414 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2415 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2416 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2417 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2418 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2419 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2420 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2421 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2422 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2423 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2424 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2425 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2426 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2427 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2428 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2429 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2430 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2431 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2432 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2433 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2434 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2435 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2436 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2437 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2438 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2439 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2440 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2441 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2442 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2443 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2444 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2445 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2446 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2447 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2448 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2449 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2450 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2451 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2452 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2453 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2454 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2455 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2456 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2457 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2458 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2459 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2460 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2461 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2462 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2463 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2464 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2465 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2466 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2467 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2468 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2469 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2470 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2471 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2472 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2473 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2474 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2475 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2476 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2477 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2478 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2479 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2480 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2481 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2482 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2483 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2484 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2485 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2486 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2487 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2488 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2489 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2490 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2491 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2492 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2493 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2494 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2495 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2496 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2497 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2498 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2499 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2500 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2501 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2502 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2503 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2504 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2505 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2506 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2507 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2508 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2509 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2510 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2511 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2512 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2513 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2514 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2515 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2516 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2517 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2518 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2519 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2520 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2521 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2522 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2523 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2524 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2525 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2526 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2527 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2528 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2529 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2530 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2531 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2532 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2533 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2534 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2535 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2536 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2537 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2538 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2539 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2540 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2541 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2542 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2543 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2544 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2545 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2546 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2547 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2548 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2549 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2550 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2551 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2552 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2553 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2554 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2555 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2556 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2557 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2558 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2559 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2560 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2561 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2562 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2563 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2564 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2565 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2566 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2567 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2568 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2569 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2570 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2571 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2572 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2573 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2574 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2575 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2576 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2577 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2578 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2579 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2580 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2581 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2582 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2583 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2584 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2585 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2586 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2587 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2588 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2589 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2590 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2591 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2592 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2593 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2594 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2595 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2596 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2597 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2598 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2599 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2600 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2601 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2602 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2603 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2604 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2605 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2606 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2607 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2608 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2609 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2610 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2611 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2612 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2613 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2614 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2615 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2616 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2617 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2618 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2619 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2620 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2621 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2622 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2623 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2624 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2625 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2626 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2627 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2628 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2629 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2630 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2631 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2632 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2633 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2634 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2635 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2636 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2637 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2638 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2639 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2640 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2641 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2642 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2643 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2644 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2645 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2646 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2647 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2648 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2649 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2650 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2651 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2652 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2653 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2654 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2655 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2656 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2657 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2658 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2659 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2660 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2661 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2662 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2663 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2664 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2665 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2666 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2667 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2668 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2669 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2670 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2671 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2672 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2673 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2674 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2675 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2676 "\u{1F1E7}\u{1F1E8}"]),
2677 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2678 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2679 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2680 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2683 let test_diff: [(_, &[_], &[_]); 23] = [
2684 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2685 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2686 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2687 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2688 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2689 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2690 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2691 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2692 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2693 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2694 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2695 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2696 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2697 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2698 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2699 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2700 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2701 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2702 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2703 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2704 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2705 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2706 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2709 for &(s, g) in test_same.iter() {
2710 // test forward iterator
2711 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2712 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2714 // test reverse iterator
2715 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2716 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2719 for &(s, gt, gf) in test_diff.iter() {
2720 // test forward iterator
2721 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2722 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2724 // test reverse iterator
2725 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2726 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2729 // test the indices iterators
2730 let s = "a̐éö̲\r\n";
2731 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2732 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2733 assert_eq!(gr_inds, b);
2734 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2735 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2736 assert_eq!(gr_inds, b);
2737 let mut gr_inds_iter = s.grapheme_indices(true);
2739 let gr_inds = gr_inds_iter.by_ref();
2740 let e1 = gr_inds.size_hint();
2741 assert_eq!(e1, (1, Some(13)));
2742 let c = gr_inds.count();
2745 let e2 = gr_inds_iter.size_hint();
2746 assert_eq!(e2, (0, Some(0)));
2748 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2750 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2751 let b: &[_] = &["\r", "\r\n", "\n"];
2756 fn test_split_strator() {
2757 fn t(s: &str, sep: &str, u: &[&str]) {
2758 let v: Vec<&str> = s.split_str(sep).collect();
2761 t("--1233345--", "12345", &["--1233345--"]);
2762 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2763 t("::hello::there", "::", &["", "hello", "there"]);
2764 t("hello::there::", "::", &["hello", "there", ""]);
2765 t("::hello::there::", "::", &["", "hello", "there", ""]);
2766 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2767 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2768 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2769 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2771 t("zz", "zz", &["",""]);
2772 t("ok", "z", &["ok"]);
2773 t("zzz", "zz", &["","z"]);
2774 t("zzzzz", "zz", &["","","z"]);
2778 fn test_str_default() {
2779 use core::default::Default;
2780 fn t<S: Default + Str>() {
2781 let s: S = Default::default();
2782 assert_eq!(s.as_slice(), "");
2790 fn test_str_container() {
2791 fn sum_len(v: &[&str]) -> uint {
2792 v.iter().map(|x| x.len()).sum()
2795 let s = String::from_str("01234");
2796 assert_eq!(5, sum_len(&["012", "", "34"]));
2797 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2798 String::from_str("2").as_slice(),
2799 String::from_str("34").as_slice(),
2800 String::from_str("").as_slice()]));
2801 assert_eq!(5, sum_len(&[s.as_slice()]));
2805 fn test_str_from_utf8() {
2807 assert_eq!(from_utf8(xs), Ok("hello"));
2809 let xs = "ศไทย中华Việt Nam".as_bytes();
2810 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2812 let xs = b"hello\xFF";
2813 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2820 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2822 use test::black_box;
2825 fn char_iterator(b: &mut Bencher) {
2826 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2828 b.iter(|| s.chars().count());
2832 fn char_iterator_for(b: &mut Bencher) {
2833 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2836 for ch in s.chars() { black_box(ch) }
2841 fn char_iterator_ascii(b: &mut Bencher) {
2842 let s = "Mary had a little lamb, Little lamb
2843 Mary had a little lamb, Little lamb
2844 Mary had a little lamb, Little lamb
2845 Mary had a little lamb, Little lamb
2846 Mary had a little lamb, Little lamb
2847 Mary had a little lamb, Little lamb";
2849 b.iter(|| s.chars().count());
2853 fn char_iterator_rev(b: &mut Bencher) {
2854 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2856 b.iter(|| s.chars().rev().count());
2860 fn char_iterator_rev_for(b: &mut Bencher) {
2861 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2864 for ch in s.chars().rev() { black_box(ch) }
2869 fn char_indicesator(b: &mut Bencher) {
2870 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2871 let len = s.chars().count();
2873 b.iter(|| assert_eq!(s.char_indices().count(), len));
2877 fn char_indicesator_rev(b: &mut Bencher) {
2878 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2879 let len = s.chars().count();
2881 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2885 fn split_unicode_ascii(b: &mut Bencher) {
2886 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2888 b.iter(|| assert_eq!(s.split('V').count(), 3));
2892 fn split_unicode_not_ascii(b: &mut Bencher) {
2893 struct NotAscii(char);
2894 impl CharEq for NotAscii {
2895 fn matches(&mut self, c: char) -> bool {
2896 let NotAscii(cc) = *self;
2899 fn only_ascii(&self) -> bool { false }
2901 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2903 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2908 fn split_ascii(b: &mut Bencher) {
2909 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2910 let len = s.split(' ').count();
2912 b.iter(|| assert_eq!(s.split(' ').count(), len));
2916 fn split_not_ascii(b: &mut Bencher) {
2917 struct NotAscii(char);
2918 impl CharEq for NotAscii {
2920 fn matches(&mut self, c: char) -> bool {
2921 let NotAscii(cc) = *self;
2924 fn only_ascii(&self) -> bool { false }
2926 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2927 let len = s.split(' ').count();
2929 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2933 fn split_extern_fn(b: &mut Bencher) {
2934 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2935 let len = s.split(' ').count();
2936 fn pred(c: char) -> bool { c == ' ' }
2938 b.iter(|| assert_eq!(s.split(pred).count(), len));
2942 fn split_closure(b: &mut Bencher) {
2943 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2944 let len = s.split(' ').count();
2946 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2950 fn split_slice(b: &mut Bencher) {
2951 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2952 let len = s.split(' ').count();
2954 let c: &[char] = &[' '];
2955 b.iter(|| assert_eq!(s.split(c).count(), len));
2959 fn bench_connect(b: &mut Bencher) {
2960 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2962 let v = vec![s, s, s, s, s, s, s, s, s, s];
2964 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2969 fn bench_contains_short_short(b: &mut Bencher) {
2970 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2974 assert!(haystack.contains(needle));
2979 fn bench_contains_short_long(b: &mut Bencher) {
2981 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2982 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2983 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2984 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2985 tempus vel, gravida nec quam.
2987 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2988 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2989 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2990 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2991 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2992 interdum. Curabitur ut nisi justo.
2994 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2995 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2996 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2997 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2998 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2999 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3000 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3001 Aliquam sit amet placerat lorem.
3003 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3004 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3005 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3006 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3007 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3010 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3011 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3012 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3013 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3014 malesuada sollicitudin quam eu fermentum.";
3015 let needle = "english";
3018 assert!(!haystack.contains(needle));
3023 fn bench_contains_bad_naive(b: &mut Bencher) {
3024 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3025 let needle = "aaaaaaaab";
3028 assert!(!haystack.contains(needle));
3033 fn bench_contains_equal(b: &mut Bencher) {
3034 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3035 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3038 assert!(haystack.contains(needle));