1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
58 use core::borrow::{BorrowFrom, ToOwned};
59 use core::char::CharExt;
60 use core::clone::Clone;
61 use core::iter::AdditiveIterator;
62 use core::iter::{range, Iterator, IteratorExt};
63 use core::ops::{FullRange, Index};
64 use core::option::Option::{self, Some, None};
65 use core::slice::AsSlice;
66 use core::str as core_str;
67 use unicode::str::{UnicodeStr, Utf16Encoder};
69 use ring_buf::RingBuf;
74 use slice::SliceConcatExt;
76 pub use core::str::{FromStr, Utf8Error, Str};
77 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
78 pub use core::str::{Split, SplitTerminator};
79 pub use core::str::{SplitN, RSplitN};
80 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
81 pub use core::str::{from_utf8_unchecked, from_c_str};
82 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
85 Section: Creating a string
88 impl<S: Str> SliceConcatExt<str, String> for [S] {
89 fn concat(&self) -> String {
90 let s = self.as_slice();
96 // `len` calculation may overflow but push_str will check boundaries
97 let len = s.iter().map(|s| s.as_slice().len()).sum();
98 let mut result = String::with_capacity(len);
101 result.push_str(s.as_slice())
107 fn connect(&self, sep: &str) -> String {
108 let s = self.as_slice();
111 return String::new();
119 // this is wrong without the guarantee that `self` is non-empty
120 // `len` calculation may overflow but push_str but will check boundaries
121 let len = sep.len() * (s.len() - 1)
122 + s.iter().map(|s| s.as_slice().len()).sum();
123 let mut result = String::with_capacity(len);
124 let mut first = true;
130 result.push_str(sep);
132 result.push_str(s.as_slice());
142 // Helper functions used for Unicode normalization
143 fn canonical_sort(comb: &mut [(char, u8)]) {
144 let len = comb.len();
145 for i in range(0, len) {
146 let mut swapped = false;
147 for j in range(1, len-i) {
148 let class_a = comb[j-1].1;
149 let class_b = comb[j].1;
150 if class_a != 0 && class_b != 0 && class_a > class_b {
155 if !swapped { break; }
160 enum DecompositionType {
165 /// External iterator for a string's decomposition's characters.
166 /// Use with the `std::iter` module.
169 pub struct Decompositions<'a> {
170 kind: DecompositionType,
172 buffer: Vec<(char, u8)>,
177 impl<'a> Iterator for Decompositions<'a> {
181 fn next(&mut self) -> Option<char> {
182 match self.buffer.first() {
185 self.buffer.remove(0);
188 Some(&(c, _)) if self.sorted => {
189 self.buffer.remove(0);
192 _ => self.sorted = false
196 for ch in self.iter {
197 let buffer = &mut self.buffer;
198 let sorted = &mut self.sorted;
200 let callback = |&mut: d| {
202 unicode::char::canonical_combining_class(d);
203 if class == 0 && !*sorted {
204 canonical_sort(buffer.as_mut_slice());
207 buffer.push((d, class));
211 unicode::char::decompose_canonical(ch, callback)
214 unicode::char::decompose_compatible(ch, callback)
225 canonical_sort(self.buffer.as_mut_slice());
229 if self.buffer.is_empty() {
232 match self.buffer.remove(0) {
242 fn size_hint(&self) -> (uint, Option<uint>) {
243 let (lower, _) = self.iter.size_hint();
249 enum RecompositionState {
255 /// External iterator for a string's recomposition's characters.
256 /// Use with the `std::iter` module.
259 pub struct Recompositions<'a> {
260 iter: Decompositions<'a>,
261 state: RecompositionState,
262 buffer: RingBuf<char>,
263 composee: Option<char>,
268 impl<'a> Iterator for Recompositions<'a> {
272 fn next(&mut self) -> Option<char> {
276 for ch in self.iter {
277 let ch_class = unicode::char::canonical_combining_class(ch);
278 if self.composee.is_none() {
282 self.composee = Some(ch);
285 let k = self.composee.clone().unwrap();
287 match self.last_ccc {
289 match unicode::char::compose(k, ch) {
291 self.composee = Some(r);
296 self.composee = Some(ch);
299 self.buffer.push_back(ch);
300 self.last_ccc = Some(ch_class);
305 if l_class >= ch_class {
306 // `ch` is blocked from `composee`
308 self.composee = Some(ch);
309 self.last_ccc = None;
310 self.state = Purging;
313 self.buffer.push_back(ch);
314 self.last_ccc = Some(ch_class);
317 match unicode::char::compose(k, ch) {
319 self.composee = Some(r);
323 self.buffer.push_back(ch);
324 self.last_ccc = Some(ch_class);
330 self.state = Finished;
331 if self.composee.is_some() {
332 return self.composee.take();
336 match self.buffer.pop_front() {
337 None => self.state = Composing,
342 match self.buffer.pop_front() {
343 None => return self.composee.take(),
352 /// External iterator for a string's UTF16 codeunits.
353 /// Use with the `std::iter` module.
356 pub struct Utf16Units<'a> {
357 encoder: Utf16Encoder<Chars<'a>>
361 impl<'a> Iterator for Utf16Units<'a> {
365 fn next(&mut self) -> Option<u16> { self.encoder.next() }
368 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
375 // Return the initial codepoint accumulator for the first byte.
376 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
377 // for width 3, and 3 bits for width 4
378 macro_rules! utf8_first_byte {
379 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
382 // return the value of $ch updated with continuation byte $byte
383 macro_rules! utf8_acc_cont_byte {
384 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
387 #[unstable = "trait is unstable"]
388 impl BorrowFrom<String> for str {
389 fn borrow_from(owned: &String) -> &str { &owned[] }
392 #[unstable = "trait is unstable"]
393 impl ToOwned<String> for str {
394 fn to_owned(&self) -> String {
396 String::from_utf8_unchecked(self.as_bytes().to_owned())
406 Section: Trait implementations
409 /// Any string that can be represented as a slice.
411 pub trait StrExt: Index<FullRange, Output = str> {
412 /// Escapes each char in `s` with `char::escape_default`.
413 #[unstable = "return type may change to be an iterator"]
414 fn escape_default(&self) -> String {
415 self.chars().flat_map(|c| c.escape_default()).collect()
418 /// Escapes each char in `s` with `char::escape_unicode`.
419 #[unstable = "return type may change to be an iterator"]
420 fn escape_unicode(&self) -> String {
421 self.chars().flat_map(|c| c.escape_unicode()).collect()
424 /// Replaces all occurrences of one string with another.
428 /// * `from` - The string to replace
429 /// * `to` - The replacement string
433 /// The original string with all occurrences of `from` replaced with `to`.
438 /// let s = "Do you know the muffin man,
439 /// The muffin man, the muffin man, ...".to_string();
441 /// assert_eq!(s.replace("muffin man", "little lamb"),
442 /// "Do you know the little lamb,
443 /// The little lamb, the little lamb, ...".to_string());
445 /// // not found, so no change.
446 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
449 fn replace(&self, from: &str, to: &str) -> String {
450 let mut result = String::new();
451 let mut last_end = 0;
452 for (start, end) in self.match_indices(from) {
453 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
457 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
461 /// Returns an iterator over the string in Unicode Normalization Form D
462 /// (canonical decomposition).
464 #[unstable = "this functionality may be moved to libunicode"]
465 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
467 iter: self[].chars(),
474 /// Returns an iterator over the string in Unicode Normalization Form KD
475 /// (compatibility decomposition).
477 #[unstable = "this functionality may be moved to libunicode"]
478 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
480 iter: self[].chars(),
487 /// An Iterator over the string in Unicode Normalization Form C
488 /// (canonical decomposition followed by canonical composition).
490 #[unstable = "this functionality may be moved to libunicode"]
491 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
493 iter: self.nfd_chars(),
495 buffer: RingBuf::new(),
501 /// An Iterator over the string in Unicode Normalization Form KC
502 /// (compatibility decomposition followed by canonical composition).
504 #[unstable = "this functionality may be moved to libunicode"]
505 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
507 iter: self.nfkd_chars(),
509 buffer: RingBuf::new(),
515 /// Returns true if a string contains a string pattern.
519 /// - pat - The string pattern to look for
524 /// assert!("bananas".contains("nana"));
527 fn contains(&self, pat: &str) -> bool {
528 core_str::StrExt::contains(&self[], pat)
531 /// Returns true if a string contains a char pattern.
535 /// - pat - The char pattern to look for
540 /// assert!("hello".contains_char('e'));
542 #[unstable = "might get removed in favour of a more generic contains()"]
543 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
544 core_str::StrExt::contains_char(&self[], pat)
547 /// An iterator over the characters of `self`. Note, this iterates
548 /// over Unicode code-points, not Unicode graphemes.
553 /// let v: Vec<char> = "abc åäö".chars().collect();
554 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
557 fn chars(&self) -> Chars {
558 core_str::StrExt::chars(&self[])
561 /// An iterator over the bytes of `self`
566 /// let v: Vec<u8> = "bors".bytes().collect();
567 /// assert_eq!(v, b"bors".to_vec());
570 fn bytes(&self) -> Bytes {
571 core_str::StrExt::bytes(&self[])
574 /// An iterator over the characters of `self` and their byte offsets.
576 fn char_indices(&self) -> CharIndices {
577 core_str::StrExt::char_indices(&self[])
580 /// An iterator over substrings of `self`, separated by characters
581 /// matched by the pattern `pat`.
586 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
587 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
589 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
590 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
592 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
593 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
595 /// let v: Vec<&str> = "".split('X').collect();
596 /// assert_eq!(v, vec![""]);
599 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
600 core_str::StrExt::split(&self[], pat)
603 /// An iterator over substrings of `self`, separated by characters
604 /// matched by the pattern `pat`, restricted to splitting at most `count`
610 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
611 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
613 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
614 /// assert_eq!(v, vec!["abc", "def2ghi"]);
616 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
617 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
619 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
620 /// assert_eq!(v, vec!["abcXdef"]);
622 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
623 /// assert_eq!(v, vec![""]);
626 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
627 core_str::StrExt::splitn(&self[], count, pat)
630 /// An iterator over substrings of `self`, separated by characters
631 /// matched by the pattern `pat`.
633 /// Equivalent to `split`, except that the trailing substring
634 /// is skipped if empty (terminator semantics).
639 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
640 /// assert_eq!(v, vec!["A", "B"]);
642 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
643 /// assert_eq!(v, vec!["A", "", "B", ""]);
645 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
646 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
648 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
649 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
651 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
652 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
654 #[unstable = "might get removed"]
655 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
656 core_str::StrExt::split_terminator(&self[], pat)
659 /// An iterator over substrings of `self`, separated by characters
660 /// matched by the pattern `pat`, starting from the end of the string.
661 /// Restricted to splitting at most `count` times.
666 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
667 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
669 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
670 /// assert_eq!(v, vec!["ghi", "abc1def"]);
672 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
673 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
676 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
677 core_str::StrExt::rsplitn(&self[], count, pat)
680 /// An iterator over the start and end indices of the disjoint
681 /// matches of the pattern `pat` within `self`.
683 /// That is, each returned value `(start, end)` satisfies
684 /// `self.slice(start, end) == sep`. For matches of `sep` within
685 /// `self` that overlap, only the indices corresponding to the
686 /// first match are returned.
691 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
692 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
694 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
695 /// assert_eq!(v, vec![(1,4), (4,7)]);
697 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
698 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
700 #[unstable = "might have its iterator type changed"]
701 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
702 core_str::StrExt::match_indices(&self[], pat)
705 /// An iterator over the substrings of `self` separated by the pattern `sep`.
710 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
711 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
713 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
714 /// assert_eq!(v, vec!["1", "", "2"]);
716 #[unstable = "might get removed in the future in favor of a more generic split()"]
717 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
718 core_str::StrExt::split_str(&self[], pat)
721 /// An iterator over the lines of a string (subsequences separated
722 /// by `\n`). This does not include the empty string after a
728 /// let four_lines = "foo\nbar\n\nbaz\n";
729 /// let v: Vec<&str> = four_lines.lines().collect();
730 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
733 fn lines(&self) -> Lines {
734 core_str::StrExt::lines(&self[])
737 /// An iterator over the lines of a string, separated by either
738 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
739 /// empty trailing line.
744 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
745 /// let v: Vec<&str> = four_lines.lines_any().collect();
746 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
749 fn lines_any(&self) -> LinesAny {
750 core_str::StrExt::lines_any(&self[])
753 /// Deprecated: use `s[a .. b]` instead.
754 #[deprecated = "use slice notation [a..b] instead"]
755 fn slice(&self, begin: uint, end: uint) -> &str;
757 /// Deprecated: use `s[a..]` instead.
758 #[deprecated = "use slice notation [a..] instead"]
759 fn slice_from(&self, begin: uint) -> &str;
761 /// Deprecated: use `s[..a]` instead.
762 #[deprecated = "use slice notation [..a] instead"]
763 fn slice_to(&self, end: uint) -> &str;
765 /// Returns a slice of the string from the character range
766 /// [`begin`..`end`).
768 /// That is, start at the `begin`-th code point of the string and
769 /// continue to the `end`-th code point. This does not detect or
770 /// handle edge cases such as leaving a combining character as the
771 /// first code point of the string.
773 /// Due to the design of UTF-8, this operation is `O(end)`.
774 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
775 /// variants that use byte indices rather than code point
778 /// Panics if `begin` > `end` or the either `begin` or `end` are
779 /// beyond the last character of the string.
784 /// let s = "Löwe 老虎 Léopard";
785 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
786 /// assert_eq!(s.slice_chars(5, 7), "老虎");
788 #[unstable = "may have yet to prove its worth"]
789 fn slice_chars(&self, begin: uint, end: uint) -> &str {
790 core_str::StrExt::slice_chars(&self[], begin, end)
793 /// Takes a bytewise (not UTF-8) slice from a string.
795 /// Returns the substring from [`begin`..`end`).
797 /// Caller must check both UTF-8 character boundaries and the boundaries of
798 /// the entire slice as well.
800 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
801 core_str::StrExt::slice_unchecked(&self[], begin, end)
804 /// Returns true if the pattern `pat` is a prefix of the string.
809 /// assert!("banana".starts_with("ba"));
812 fn starts_with(&self, pat: &str) -> bool {
813 core_str::StrExt::starts_with(&self[], pat)
816 /// Returns true if the pattern `pat` is a suffix of the string.
821 /// assert!("banana".ends_with("nana"));
824 fn ends_with(&self, pat: &str) -> bool {
825 core_str::StrExt::ends_with(&self[], pat)
828 /// Returns a string with all pre- and suffixes that match
829 /// the pattern `pat` repeatedly removed.
833 /// * pat - a string pattern
838 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
839 /// let x: &[_] = &['1', '2'];
840 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
841 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
844 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
845 core_str::StrExt::trim_matches(&self[], pat)
848 /// Returns a string with all prefixes that match
849 /// the pattern `pat` repeatedly removed.
853 /// * pat - a string pattern
858 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
859 /// let x: &[_] = &['1', '2'];
860 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
861 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
864 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
865 core_str::StrExt::trim_left_matches(&self[], pat)
868 /// Returns a string with all suffixes that match
869 /// the pattern `pat` repeatedly removed.
873 /// * pat - a string pattern
878 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
879 /// let x: &[_] = &['1', '2'];
880 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
881 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
884 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
885 core_str::StrExt::trim_right_matches(&self[], pat)
888 /// Check that `index`-th byte lies at the start and/or end of a
889 /// UTF-8 code point sequence.
891 /// The start and end of the string (when `index == self.len()`)
892 /// are considered to be boundaries.
894 /// Panics if `index` is greater than `self.len()`.
899 /// let s = "Löwe 老虎 Léopard";
900 /// assert!(s.is_char_boundary(0));
902 /// assert!(s.is_char_boundary(6));
903 /// assert!(s.is_char_boundary(s.len()));
905 /// // second byte of `ö`
906 /// assert!(!s.is_char_boundary(2));
908 /// // third byte of `老`
909 /// assert!(!s.is_char_boundary(8));
911 #[unstable = "naming is uncertain with container conventions"]
912 fn is_char_boundary(&self, index: uint) -> bool {
913 core_str::StrExt::is_char_boundary(&self[], index)
916 /// Pluck a character out of a string and return the index of the next
919 /// This function can be used to iterate over the Unicode characters of a
924 /// This example manually iterates through the characters of a
925 /// string; this should normally be done by `.chars()` or
929 /// use std::str::CharRange;
931 /// let s = "中华Việt Nam";
933 /// while i < s.len() {
934 /// let CharRange {ch, next} = s.char_range_at(i);
935 /// println!("{}: {}", i, ch);
958 /// * i - The byte offset of the char to extract
962 /// A record {ch: char, next: uint} containing the char value and the byte
963 /// index of the next Unicode character.
967 /// If `i` is greater than or equal to the length of the string.
968 /// If `i` is not the index of the beginning of a valid UTF-8 character.
969 #[unstable = "naming is uncertain with container conventions"]
970 fn char_range_at(&self, start: uint) -> CharRange {
971 core_str::StrExt::char_range_at(&self[], start)
974 /// Given a byte position and a str, return the previous char and its position.
976 /// This function can be used to iterate over a Unicode string in reverse.
978 /// Returns 0 for next index if called on start index 0.
982 /// If `i` is greater than the length of the string.
983 /// If `i` is not an index following a valid UTF-8 character.
984 #[unstable = "naming is uncertain with container conventions"]
985 fn char_range_at_reverse(&self, start: uint) -> CharRange {
986 core_str::StrExt::char_range_at_reverse(&self[], start)
989 /// Plucks the character starting at the `i`th byte of a string.
995 /// assert_eq!(s.char_at(1), 'b');
996 /// assert_eq!(s.char_at(2), 'π');
997 /// assert_eq!(s.char_at(4), 'c');
1002 /// If `i` is greater than or equal to the length of the string.
1003 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1004 #[unstable = "naming is uncertain with container conventions"]
1005 fn char_at(&self, i: uint) -> char {
1006 core_str::StrExt::char_at(&self[], i)
1009 /// Plucks the character ending at the `i`th byte of a string.
1013 /// If `i` is greater than the length of the string.
1014 /// If `i` is not an index following a valid UTF-8 character.
1015 #[unstable = "naming is uncertain with container conventions"]
1016 fn char_at_reverse(&self, i: uint) -> char {
1017 core_str::StrExt::char_at_reverse(&self[], i)
1020 /// Work with the byte buffer of a string as a byte slice.
1025 /// assert_eq!("bors".as_bytes(), b"bors");
1028 fn as_bytes(&self) -> &[u8] {
1029 core_str::StrExt::as_bytes(&self[])
1032 /// Returns the byte index of the first character of `self` that
1033 /// matches the pattern `pat`.
1037 /// `Some` containing the byte index of the last matching character
1038 /// or `None` if there is no match
1043 /// let s = "Löwe 老虎 Léopard";
1045 /// assert_eq!(s.find('L'), Some(0));
1046 /// assert_eq!(s.find('é'), Some(14));
1048 /// // the first space
1049 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1051 /// // neither are found
1052 /// let x: &[_] = &['1', '2'];
1053 /// assert_eq!(s.find(x), None);
1056 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1057 core_str::StrExt::find(&self[], pat)
1060 /// Returns the byte index of the last character of `self` that
1061 /// matches the pattern `pat`.
1065 /// `Some` containing the byte index of the last matching character
1066 /// or `None` if there is no match.
1071 /// let s = "Löwe 老虎 Léopard";
1073 /// assert_eq!(s.rfind('L'), Some(13));
1074 /// assert_eq!(s.rfind('é'), Some(14));
1076 /// // the second space
1077 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1079 /// // searches for an occurrence of either `1` or `2`, but neither are found
1080 /// let x: &[_] = &['1', '2'];
1081 /// assert_eq!(s.rfind(x), None);
1084 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1085 core_str::StrExt::rfind(&self[], pat)
1088 /// Returns the byte index of the first matching substring
1092 /// * `needle` - The string to search for
1096 /// `Some` containing the byte index of the first matching substring
1097 /// or `None` if there is no match.
1102 /// let s = "Löwe 老虎 Léopard";
1104 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1105 /// assert_eq!(s.find_str("muffin man"), None);
1107 #[unstable = "might get removed in favor of a more generic find in the future"]
1108 fn find_str(&self, needle: &str) -> Option<uint> {
1109 core_str::StrExt::find_str(&self[], needle)
1112 /// Retrieves the first character from a string slice and returns
1113 /// it. This does not allocate a new string; instead, it returns a
1114 /// slice that point one character beyond the character that was
1115 /// shifted. If the string does not contain any characters,
1116 /// None is returned instead.
1121 /// let s = "Löwe 老虎 Léopard";
1122 /// let (c, s1) = s.slice_shift_char().unwrap();
1123 /// assert_eq!(c, 'L');
1124 /// assert_eq!(s1, "öwe 老虎 Léopard");
1126 /// let (c, s2) = s1.slice_shift_char().unwrap();
1127 /// assert_eq!(c, 'ö');
1128 /// assert_eq!(s2, "we 老虎 Léopard");
1130 #[unstable = "awaiting conventions about shifting and slices"]
1131 fn slice_shift_char(&self) -> Option<(char, &str)> {
1132 core_str::StrExt::slice_shift_char(&self[])
1135 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1137 /// Panics if `inner` is not a direct slice contained within self.
1142 /// let string = "a\nb\nc";
1143 /// let lines: Vec<&str> = string.lines().collect();
1145 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1146 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1147 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1149 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1150 fn subslice_offset(&self, inner: &str) -> uint {
1151 core_str::StrExt::subslice_offset(&self[], inner)
1154 /// Return an unsafe pointer to the strings buffer.
1156 /// The caller must ensure that the string outlives this pointer,
1157 /// and that it is not reallocated (e.g. by pushing to the
1161 fn as_ptr(&self) -> *const u8 {
1162 core_str::StrExt::as_ptr(&self[])
1165 /// Return an iterator of `u16` over the string encoded as UTF-16.
1166 #[unstable = "this functionality may only be provided by libunicode"]
1167 fn utf16_units(&self) -> Utf16Units {
1168 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1171 /// Return the number of bytes in this string
1176 /// assert_eq!("foo".len(), 3);
1177 /// assert_eq!("ƒoo".len(), 4);
1181 fn len(&self) -> uint {
1182 core_str::StrExt::len(&self[])
1185 /// Returns true if this slice contains no bytes
1190 /// assert!("".is_empty());
1194 fn is_empty(&self) -> bool {
1195 core_str::StrExt::is_empty(&self[])
1198 /// Parse this string into the specified type.
1203 /// assert_eq!("4".parse::<u32>(), Some(4));
1204 /// assert_eq!("j".parse::<u32>(), None);
1207 #[unstable = "this method was just created"]
1208 fn parse<F: FromStr>(&self) -> Option<F> {
1209 core_str::StrExt::parse(&self[])
1212 /// Returns an iterator over the
1213 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1216 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1217 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1218 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1219 /// recommends extended grapheme cluster boundaries for general processing.
1224 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1225 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1226 /// assert_eq!(gr1.as_slice(), b);
1227 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1228 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1229 /// assert_eq!(gr2.as_slice(), b);
1231 #[unstable = "this functionality may only be provided by libunicode"]
1232 fn graphemes(&self, is_extended: bool) -> Graphemes {
1233 UnicodeStr::graphemes(&self[], is_extended)
1236 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1237 /// See `graphemes()` method for more information.
1242 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1243 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1244 /// assert_eq!(gr_inds.as_slice(), b);
1246 #[unstable = "this functionality may only be provided by libunicode"]
1247 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1248 UnicodeStr::grapheme_indices(&self[], is_extended)
1251 /// An iterator over the words of a string (subsequences separated
1252 /// by any sequence of whitespace). Sequences of whitespace are
1253 /// collapsed, so empty "words" are not included.
1258 /// let some_words = " Mary had\ta little \n\t lamb";
1259 /// let v: Vec<&str> = some_words.words().collect();
1260 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1263 fn words(&self) -> Words {
1264 UnicodeStr::words(&self[])
1267 /// Returns a string's displayed width in columns, treating control
1268 /// characters as zero-width.
1270 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1271 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1272 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1273 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1274 /// recommends that these characters be treated as 1 column (i.e.,
1275 /// `is_cjk` = `false`) if the locale is unknown.
1276 #[unstable = "this functionality may only be provided by libunicode"]
1277 fn width(&self, is_cjk: bool) -> uint {
1278 UnicodeStr::width(&self[], is_cjk)
1281 /// Returns a string with leading and trailing whitespace removed.
1283 fn trim(&self) -> &str {
1284 UnicodeStr::trim(&self[])
1287 /// Returns a string with leading whitespace removed.
1289 fn trim_left(&self) -> &str {
1290 UnicodeStr::trim_left(&self[])
1293 /// Returns a string with trailing whitespace removed.
1295 fn trim_right(&self) -> &str {
1296 UnicodeStr::trim_right(&self[])
1301 impl StrExt for str {
1302 fn slice(&self, begin: uint, end: uint) -> &str {
1306 fn slice_from(&self, begin: uint) -> &str {
1310 fn slice_to(&self, end: uint) -> &str {
1319 use core::iter::AdditiveIterator;
1320 use super::from_utf8;
1321 use super::Utf8Error;
1326 assert!("" <= "foo");
1327 assert!("foo" <= "foo");
1328 assert!("foo" != "bar");
1333 assert_eq!("".len(), 0u);
1334 assert_eq!("hello world".len(), 11u);
1335 assert_eq!("\x63".len(), 1u);
1336 assert_eq!("\u{a2}".len(), 2u);
1337 assert_eq!("\u{3c0}".len(), 2u);
1338 assert_eq!("\u{2620}".len(), 3u);
1339 assert_eq!("\u{1d11e}".len(), 4u);
1341 assert_eq!("".chars().count(), 0u);
1342 assert_eq!("hello world".chars().count(), 11u);
1343 assert_eq!("\x63".chars().count(), 1u);
1344 assert_eq!("\u{a2}".chars().count(), 1u);
1345 assert_eq!("\u{3c0}".chars().count(), 1u);
1346 assert_eq!("\u{2620}".chars().count(), 1u);
1347 assert_eq!("\u{1d11e}".chars().count(), 1u);
1348 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1350 assert_eq!("hello".width(false), 10u);
1351 assert_eq!("hello".width(true), 10u);
1352 assert_eq!("\0\0\0\0\0".width(false), 0u);
1353 assert_eq!("\0\0\0\0\0".width(true), 0u);
1354 assert_eq!("".width(false), 0u);
1355 assert_eq!("".width(true), 0u);
1356 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1357 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1362 assert_eq!("hello".find('l'), Some(2u));
1363 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1364 assert!("hello".find('x').is_none());
1365 assert!("hello".find(|&: c:char| c == 'x').is_none());
1366 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1367 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1372 assert_eq!("hello".rfind('l'), Some(3u));
1373 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1374 assert!("hello".rfind('x').is_none());
1375 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1376 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1377 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1382 let empty = String::from_str("");
1383 let s: String = empty.chars().collect();
1384 assert_eq!(empty, s);
1385 let data = String::from_str("ประเทศไทย中");
1386 let s: String = data.chars().collect();
1387 assert_eq!(data, s);
1391 fn test_into_bytes() {
1392 let data = String::from_str("asdf");
1393 let buf = data.into_bytes();
1394 assert_eq!(b"asdf", buf);
1398 fn test_find_str() {
1400 assert_eq!("".find_str(""), Some(0u));
1401 assert!("banana".find_str("apple pie").is_none());
1403 let data = "abcabc";
1404 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1405 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1406 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1408 let string = "ประเทศไทย中华Việt Nam";
1409 let mut data = String::from_str(string);
1410 data.push_str(string);
1411 assert!(data.find_str("ไท华").is_none());
1412 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1413 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1415 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1416 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1417 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1418 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1419 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1421 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1422 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1423 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1424 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1425 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1429 fn test_slice_chars() {
1430 fn t(a: &str, b: &str, start: uint) {
1431 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1434 t("hello", "llo", 2);
1435 t("hello", "el", 1);
1438 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1441 fn s(x: &str) -> String { x.to_string() }
1443 macro_rules! test_concat {
1444 ($expected: expr, $string: expr) => {
1446 let s: String = $string.concat();
1447 assert_eq!($expected, s);
1453 fn test_concat_for_different_types() {
1454 test_concat!("ab", vec![s("a"), s("b")]);
1455 test_concat!("ab", vec!["a", "b"]);
1456 test_concat!("ab", vec!["a", "b"].as_slice());
1457 test_concat!("ab", vec![s("a"), s("b")]);
1461 fn test_concat_for_different_lengths() {
1462 let empty: &[&str] = &[];
1463 test_concat!("", empty);
1464 test_concat!("a", ["a"]);
1465 test_concat!("ab", ["a", "b"]);
1466 test_concat!("abc", ["", "a", "bc"]);
1469 macro_rules! test_connect {
1470 ($expected: expr, $string: expr, $delim: expr) => {
1472 let s = $string.connect($delim);
1473 assert_eq!($expected, s);
1479 fn test_connect_for_different_types() {
1480 test_connect!("a-b", ["a", "b"], "-");
1481 let hyphen = "-".to_string();
1482 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1483 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1484 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1485 test_connect!("a-b", vec![s("a"), s("b")], "-");
1489 fn test_connect_for_different_lengths() {
1490 let empty: &[&str] = &[];
1491 test_connect!("", empty, "-");
1492 test_connect!("a", ["a"], "-");
1493 test_connect!("a-b", ["a", "b"], "-");
1494 test_connect!("-a-bc", ["", "a", "bc"], "-");
1498 fn test_unsafe_slice() {
1499 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1500 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1501 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1502 fn a_million_letter_a() -> String {
1504 let mut rs = String::new();
1506 rs.push_str("aaaaaaaaaa");
1511 fn half_a_million_letter_a() -> String {
1513 let mut rs = String::new();
1515 rs.push_str("aaaaa");
1520 let letters = a_million_letter_a();
1521 assert!(half_a_million_letter_a() ==
1522 unsafe {String::from_str(letters.slice_unchecked(
1528 fn test_starts_with() {
1529 assert!(("".starts_with("")));
1530 assert!(("abc".starts_with("")));
1531 assert!(("abc".starts_with("a")));
1532 assert!((!"a".starts_with("abc")));
1533 assert!((!"".starts_with("abc")));
1534 assert!((!"ödd".starts_with("-")));
1535 assert!(("ödd".starts_with("öd")));
1539 fn test_ends_with() {
1540 assert!(("".ends_with("")));
1541 assert!(("abc".ends_with("")));
1542 assert!(("abc".ends_with("c")));
1543 assert!((!"a".ends_with("abc")));
1544 assert!((!"".ends_with("abc")));
1545 assert!((!"ddö".ends_with("-")));
1546 assert!(("ddö".ends_with("dö")));
1550 fn test_is_empty() {
1551 assert!("".is_empty());
1552 assert!(!"a".is_empty());
1558 assert_eq!("".replace(a, "b"), String::from_str(""));
1559 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1560 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1562 assert!(" test test ".replace(test, "toast") ==
1563 String::from_str(" toast toast "));
1564 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1568 fn test_replace_2a() {
1569 let data = "ประเทศไทย中华";
1570 let repl = "دولة الكويت";
1573 let a2 = "دولة الكويتทศไทย中华";
1574 assert_eq!(data.replace(a, repl), a2);
1578 fn test_replace_2b() {
1579 let data = "ประเทศไทย中华";
1580 let repl = "دولة الكويت";
1583 let b2 = "ปรدولة الكويتทศไทย中华";
1584 assert_eq!(data.replace(b, repl), b2);
1588 fn test_replace_2c() {
1589 let data = "ประเทศไทย中华";
1590 let repl = "دولة الكويت";
1593 let c2 = "ประเทศไทยدولة الكويت";
1594 assert_eq!(data.replace(c, repl), c2);
1598 fn test_replace_2d() {
1599 let data = "ประเทศไทย中华";
1600 let repl = "دولة الكويت";
1603 assert_eq!(data.replace(d, repl), data);
1608 assert_eq!("ab", "abc".slice(0, 2));
1609 assert_eq!("bc", "abc".slice(1, 3));
1610 assert_eq!("", "abc".slice(1, 1));
1611 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1613 let data = "ประเทศไทย中华";
1614 assert_eq!("ป", data.slice(0, 3));
1615 assert_eq!("ร", data.slice(3, 6));
1616 assert_eq!("", data.slice(3, 3));
1617 assert_eq!("华", data.slice(30, 33));
1619 fn a_million_letter_x() -> String {
1621 let mut rs = String::new();
1623 rs.push_str("华华华华华华华华华华");
1628 fn half_a_million_letter_x() -> String {
1630 let mut rs = String::new();
1632 rs.push_str("华华华华华");
1637 let letters = a_million_letter_x();
1638 assert!(half_a_million_letter_x() ==
1639 String::from_str(letters.slice(0u, 3u * 500000u)));
1644 let ss = "中华Việt Nam";
1646 assert_eq!("华", ss.slice(3u, 6u));
1647 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1649 assert_eq!("ab", "abc".slice(0u, 2u));
1650 assert_eq!("bc", "abc".slice(1u, 3u));
1651 assert_eq!("", "abc".slice(1u, 1u));
1653 assert_eq!("中", ss.slice(0u, 3u));
1654 assert_eq!("华V", ss.slice(3u, 7u));
1655 assert_eq!("", ss.slice(3u, 3u));
1670 fn test_slice_fail() {
1671 "中华Việt Nam".slice(0u, 2u);
1675 fn test_slice_from() {
1676 assert_eq!("abcd".slice_from(0), "abcd");
1677 assert_eq!("abcd".slice_from(2), "cd");
1678 assert_eq!("abcd".slice_from(4), "");
1681 fn test_slice_to() {
1682 assert_eq!("abcd".slice_to(0), "");
1683 assert_eq!("abcd".slice_to(2), "ab");
1684 assert_eq!("abcd".slice_to(4), "abcd");
1688 fn test_trim_left_matches() {
1689 let v: &[char] = &[];
1690 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1691 let chars: &[char] = &['*', ' '];
1692 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1693 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1694 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1696 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1697 let chars: &[char] = &['1', '2'];
1698 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1699 assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1703 fn test_trim_right_matches() {
1704 let v: &[char] = &[];
1705 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1706 let chars: &[char] = &['*', ' '];
1707 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1708 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1709 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1711 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1712 let chars: &[char] = &['1', '2'];
1713 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1714 assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1718 fn test_trim_matches() {
1719 let v: &[char] = &[];
1720 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1721 let chars: &[char] = &['*', ' '];
1722 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1723 assert_eq!(" *** *** ".trim_matches(chars), "");
1724 assert_eq!("foo".trim_matches(chars), "foo");
1726 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1727 let chars: &[char] = &['1', '2'];
1728 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1729 assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1733 fn test_trim_left() {
1734 assert_eq!("".trim_left(), "");
1735 assert_eq!("a".trim_left(), "a");
1736 assert_eq!(" ".trim_left(), "");
1737 assert_eq!(" blah".trim_left(), "blah");
1738 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1739 assert_eq!("hey ".trim_left(), "hey ");
1743 fn test_trim_right() {
1744 assert_eq!("".trim_right(), "");
1745 assert_eq!("a".trim_right(), "a");
1746 assert_eq!(" ".trim_right(), "");
1747 assert_eq!("blah ".trim_right(), "blah");
1748 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1749 assert_eq!(" hey".trim_right(), " hey");
1754 assert_eq!("".trim(), "");
1755 assert_eq!("a".trim(), "a");
1756 assert_eq!(" ".trim(), "");
1757 assert_eq!(" blah ".trim(), "blah");
1758 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1759 assert_eq!(" hey dude ".trim(), "hey dude");
1763 fn test_is_whitespace() {
1764 assert!("".chars().all(|c| c.is_whitespace()));
1765 assert!(" ".chars().all(|c| c.is_whitespace()));
1766 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1767 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1768 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1772 fn test_slice_shift_char() {
1773 let data = "ประเทศไทย中";
1774 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1778 fn test_slice_shift_char_2() {
1780 assert_eq!(empty.slice_shift_char(), None);
1785 // deny overlong encodings
1786 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1787 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1788 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1789 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1790 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1791 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1792 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1795 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1796 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1798 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1799 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1800 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1801 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1802 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1803 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1804 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1805 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1809 fn test_is_utf16() {
1810 use unicode::str::is_utf16;
1812 ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } }
1821 // surrogate pairs (randomly generated with Python 3's
1822 // .encode('utf-16be'))
1823 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1824 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1825 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1827 // mixtures (also random)
1828 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1829 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1830 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1834 ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } }
1838 // surrogate + regular unit
1840 // surrogate + lead surrogate
1842 // unterminated surrogate
1844 // trail surrogate without a lead
1847 // random byte sequences that Python 3's .decode('utf-16be')
1849 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1850 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1851 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1852 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1853 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1854 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1855 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1856 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1857 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1858 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1859 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1860 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1861 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1862 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1863 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1864 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1865 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1866 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1867 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1868 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1869 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1873 fn test_as_bytes() {
1876 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1877 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1881 assert_eq!("".as_bytes(), b);
1882 assert_eq!("abc".as_bytes(), b"abc");
1883 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1888 fn test_as_bytes_fail() {
1889 // Don't double free. (I'm not sure if this exercises the
1890 // original problem code path anymore.)
1891 let s = String::from_str("");
1892 let _bytes = s.as_bytes();
1898 let buf = "hello".as_ptr();
1900 assert_eq!(*buf.offset(0), b'h');
1901 assert_eq!(*buf.offset(1), b'e');
1902 assert_eq!(*buf.offset(2), b'l');
1903 assert_eq!(*buf.offset(3), b'l');
1904 assert_eq!(*buf.offset(4), b'o');
1909 fn test_subslice_offset() {
1910 let a = "kernelsprite";
1911 let b = a.slice(7, a.len());
1912 let c = a.slice(0, a.len() - 6);
1913 assert_eq!(a.subslice_offset(b), 7);
1914 assert_eq!(a.subslice_offset(c), 0);
1916 let string = "a\nb\nc";
1917 let lines: Vec<&str> = string.lines().collect();
1918 assert_eq!(string.subslice_offset(lines[0]), 0);
1919 assert_eq!(string.subslice_offset(lines[1]), 2);
1920 assert_eq!(string.subslice_offset(lines[2]), 4);
1925 fn test_subslice_offset_2() {
1926 let a = "alchemiter";
1927 let b = "cruxtruder";
1928 a.subslice_offset(b);
1932 fn vec_str_conversions() {
1933 let s1: String = String::from_str("All mimsy were the borogoves");
1935 let v: Vec<u8> = s1.as_bytes().to_vec();
1936 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1937 let mut i: uint = 0u;
1938 let n1: uint = s1.len();
1939 let n2: uint = v.len();
1942 let a: u8 = s1.as_bytes()[i];
1943 let b: u8 = s2.as_bytes()[i];
1952 fn test_contains() {
1953 assert!("abcde".contains("bcd"));
1954 assert!("abcde".contains("abcd"));
1955 assert!("abcde".contains("bcde"));
1956 assert!("abcde".contains(""));
1957 assert!("".contains(""));
1958 assert!(!"abcde".contains("def"));
1959 assert!(!"".contains("a"));
1961 let data = "ประเทศไทย中华Việt Nam";
1962 assert!(data.contains("ประเ"));
1963 assert!(data.contains("ะเ"));
1964 assert!(data.contains("中华"));
1965 assert!(!data.contains("ไท华"));
1969 fn test_contains_char() {
1970 assert!("abc".contains_char('b'));
1971 assert!("a".contains_char('a'));
1972 assert!(!"abc".contains_char('d'));
1973 assert!(!"".contains_char('a'));
1978 let s = "ศไทย中华Việt Nam";
1979 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1981 for ch in v.iter() {
1982 assert!(s.char_at(pos) == *ch);
1983 pos += ch.to_string().len();
1988 fn test_char_at_reverse() {
1989 let s = "ศไทย中华Việt Nam";
1990 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1991 let mut pos = s.len();
1992 for ch in v.iter().rev() {
1993 assert!(s.char_at_reverse(pos) == *ch);
1994 pos -= ch.to_string().len();
1999 fn test_escape_unicode() {
2000 assert_eq!("abc".escape_unicode(),
2001 String::from_str("\\u{61}\\u{62}\\u{63}"));
2002 assert_eq!("a c".escape_unicode(),
2003 String::from_str("\\u{61}\\u{20}\\u{63}"));
2004 assert_eq!("\r\n\t".escape_unicode(),
2005 String::from_str("\\u{d}\\u{a}\\u{9}"));
2006 assert_eq!("'\"\\".escape_unicode(),
2007 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2008 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2009 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2010 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2011 String::from_str("\\u{100}\\u{ffff}"));
2012 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2013 String::from_str("\\u{10000}\\u{10ffff}"));
2014 assert_eq!("ab\u{fb00}".escape_unicode(),
2015 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2016 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2017 String::from_str("\\u{1d4ea}\\u{d}"));
2021 fn test_escape_default() {
2022 assert_eq!("abc".escape_default(), String::from_str("abc"));
2023 assert_eq!("a c".escape_default(), String::from_str("a c"));
2024 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2025 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2026 assert_eq!("\u{100}\u{ffff}".escape_default(),
2027 String::from_str("\\u{100}\\u{ffff}"));
2028 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2029 String::from_str("\\u{10000}\\u{10ffff}"));
2030 assert_eq!("ab\u{fb00}".escape_default(),
2031 String::from_str("ab\\u{fb00}"));
2032 assert_eq!("\u{1d4ea}\r".escape_default(),
2033 String::from_str("\\u{1d4ea}\\r"));
2037 fn test_total_ord() {
2038 "1234".cmp("123") == Greater;
2039 "123".cmp("1234") == Less;
2040 "1234".cmp("1234") == Equal;
2041 "12345555".cmp("123456") == Less;
2042 "22".cmp("1234") == Greater;
2046 fn test_char_range_at() {
2047 let data = "b¢€𤭢𤭢€¢b";
2048 assert_eq!('b', data.char_range_at(0).ch);
2049 assert_eq!('¢', data.char_range_at(1).ch);
2050 assert_eq!('€', data.char_range_at(3).ch);
2051 assert_eq!('𤭢', data.char_range_at(6).ch);
2052 assert_eq!('𤭢', data.char_range_at(10).ch);
2053 assert_eq!('€', data.char_range_at(14).ch);
2054 assert_eq!('¢', data.char_range_at(17).ch);
2055 assert_eq!('b', data.char_range_at(19).ch);
2059 fn test_char_range_at_reverse_underflow() {
2060 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2064 fn test_iterator() {
2065 let s = "ศไทย中华Việt Nam";
2066 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2069 let mut it = s.chars();
2072 assert_eq!(c, v[pos]);
2075 assert_eq!(pos, v.len());
2079 fn test_rev_iterator() {
2080 let s = "ศไทย中华Việt Nam";
2081 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2084 let mut it = s.chars().rev();
2087 assert_eq!(c, v[pos]);
2090 assert_eq!(pos, v.len());
2094 fn test_chars_decoding() {
2095 let mut bytes = [0u8; 4];
2096 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2097 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2098 let s = ::core::str::from_utf8(&bytes[..len]).unwrap();
2099 if Some(c) != s.chars().next() {
2100 panic!("character {:x}={} does not decode correctly", c as u32, c);
2106 fn test_chars_rev_decoding() {
2107 let mut bytes = [0u8; 4];
2108 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2109 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2110 let s = ::core::str::from_utf8(&bytes[..len]).unwrap();
2111 if Some(c) != s.chars().rev().next() {
2112 panic!("character {:x}={} does not decode correctly", c as u32, c);
2118 fn test_iterator_clone() {
2119 let s = "ศไทย中华Việt Nam";
2120 let mut it = s.chars();
2122 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2126 fn test_bytesator() {
2127 let s = "ศไทย中华Việt Nam";
2129 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2130 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2135 for b in s.bytes() {
2136 assert_eq!(b, v[pos]);
2142 fn test_bytes_revator() {
2143 let s = "ศไทย中华Việt Nam";
2145 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2146 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2149 let mut pos = v.len();
2151 for b in s.bytes().rev() {
2153 assert_eq!(b, v[pos]);
2158 fn test_char_indicesator() {
2159 let s = "ศไทย中华Việt Nam";
2160 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2161 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2164 let mut it = s.char_indices();
2167 assert_eq!(c, (p[pos], v[pos]));
2170 assert_eq!(pos, v.len());
2171 assert_eq!(pos, p.len());
2175 fn test_char_indices_revator() {
2176 let s = "ศไทย中华Việt Nam";
2177 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2178 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2181 let mut it = s.char_indices().rev();
2184 assert_eq!(c, (p[pos], v[pos]));
2187 assert_eq!(pos, v.len());
2188 assert_eq!(pos, p.len());
2192 fn test_splitn_char_iterator() {
2193 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2195 let split: Vec<&str> = data.splitn(3, ' ').collect();
2196 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2198 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2199 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2202 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2203 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2205 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2206 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2210 fn test_split_char_iterator_no_trailing() {
2211 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2213 let split: Vec<&str> = data.split('\n').collect();
2214 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2216 let split: Vec<&str> = data.split_terminator('\n').collect();
2217 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2222 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2223 let words: Vec<&str> = data.words().collect();
2224 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2228 fn test_nfd_chars() {
2230 ($input: expr, $expected: expr) => {
2231 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2235 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2236 t!("\u{2026}", "\u{2026}");
2237 t!("\u{2126}", "\u{3a9}");
2238 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2239 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2240 t!("a\u{301}", "a\u{301}");
2241 t!("\u{301}a", "\u{301}a");
2242 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2243 t!("\u{ac1c}", "\u{1100}\u{1162}");
2247 fn test_nfkd_chars() {
2249 ($input: expr, $expected: expr) => {
2250 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2254 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2255 t!("\u{2026}", "...");
2256 t!("\u{2126}", "\u{3a9}");
2257 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2258 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2259 t!("a\u{301}", "a\u{301}");
2260 t!("\u{301}a", "\u{301}a");
2261 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2262 t!("\u{ac1c}", "\u{1100}\u{1162}");
2266 fn test_nfc_chars() {
2268 ($input: expr, $expected: expr) => {
2269 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2273 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2274 t!("\u{2026}", "\u{2026}");
2275 t!("\u{2126}", "\u{3a9}");
2276 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2277 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2278 t!("a\u{301}", "\u{e1}");
2279 t!("\u{301}a", "\u{301}a");
2280 t!("\u{d4db}", "\u{d4db}");
2281 t!("\u{ac1c}", "\u{ac1c}");
2282 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2286 fn test_nfkc_chars() {
2288 ($input: expr, $expected: expr) => {
2289 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2293 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2294 t!("\u{2026}", "...");
2295 t!("\u{2126}", "\u{3a9}");
2296 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2297 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2298 t!("a\u{301}", "\u{e1}");
2299 t!("\u{301}a", "\u{301}a");
2300 t!("\u{d4db}", "\u{d4db}");
2301 t!("\u{ac1c}", "\u{ac1c}");
2302 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2307 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2308 let lines: Vec<&str> = data.lines().collect();
2309 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2311 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2312 let lines: Vec<&str> = data.lines().collect();
2313 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2317 fn test_graphemes() {
2318 use core::iter::order;
2319 // official Unicode test data
2320 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2321 let test_same: [(_, &[_]); 325] = [
2322 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2323 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2324 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2325 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2326 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2327 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2328 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2329 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2330 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2331 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2332 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2333 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2334 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2335 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2336 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2337 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2338 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2339 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2340 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2341 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2342 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2343 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2344 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2345 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2346 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2347 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2348 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2349 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2350 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2351 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2352 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2353 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2354 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2355 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2356 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2357 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2358 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2359 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2360 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2361 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2362 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2363 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2364 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2365 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2366 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2367 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2368 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2369 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2370 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2371 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2372 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2373 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2374 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2375 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2376 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2377 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2378 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2379 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2380 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2381 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2382 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2383 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2384 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2385 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2386 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2387 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2388 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2389 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2390 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2391 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2392 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2393 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2394 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2395 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2396 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2397 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2398 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2399 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2400 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2401 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2402 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2403 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2404 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2405 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2406 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2407 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2408 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2409 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2410 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2411 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2412 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2413 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2414 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2415 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2416 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2417 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2418 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2419 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2420 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2421 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2422 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2423 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2424 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2425 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2426 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2427 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2428 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2429 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2430 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2431 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2432 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2433 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2434 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2435 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2436 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2437 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2438 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2439 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2440 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2441 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2442 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2443 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2444 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2445 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2446 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2447 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2448 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2449 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2450 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2451 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2452 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2453 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2454 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2455 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2456 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2457 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2458 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2459 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2460 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2461 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2462 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2463 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2464 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2465 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2466 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2467 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2468 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2469 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2470 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2471 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2472 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2473 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2474 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2475 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2476 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2477 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2478 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2479 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2480 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2481 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2482 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2483 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2484 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2485 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2486 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2487 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2488 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2489 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2490 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2491 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2492 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2493 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2494 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2495 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2496 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2497 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2498 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2499 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2500 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2501 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2502 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2503 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2504 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2505 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2506 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2507 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2508 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2509 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2510 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2511 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2512 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2513 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2514 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2515 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2516 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2517 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2518 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2519 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2520 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2521 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2522 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2523 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2524 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2525 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2526 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2527 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2528 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2529 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2530 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2531 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2532 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2533 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2534 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2535 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2536 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2537 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2538 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2539 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2540 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2541 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2542 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2543 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2544 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2545 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2546 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2547 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2548 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2549 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2550 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2551 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2552 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2553 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2554 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2555 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2556 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2557 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2558 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2559 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2560 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2561 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2562 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2563 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2564 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2565 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2566 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2567 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2568 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2569 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2570 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2571 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2572 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2573 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2574 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2575 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2576 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2577 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2578 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2579 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2580 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2581 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2582 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2583 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2584 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2585 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2586 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2587 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2588 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2589 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2590 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2591 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2592 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2593 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2594 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2595 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2596 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2597 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2598 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2599 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2600 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2601 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2602 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2603 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2604 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2605 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2606 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2607 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2608 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2609 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2610 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2611 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2612 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2613 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2614 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2615 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2616 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2617 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2618 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2619 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2620 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2621 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2622 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2623 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2624 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2625 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2626 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2627 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2628 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2629 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2630 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2631 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2632 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2633 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2634 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2635 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2636 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2637 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2638 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2639 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2640 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2641 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2642 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2643 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2644 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2645 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2646 "\u{1F1E7}\u{1F1E8}"]),
2647 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2648 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2649 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2650 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2653 let test_diff: [(_, &[_], &[_]); 23] = [
2654 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2655 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2656 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2657 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2658 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2659 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2660 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2661 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2662 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2663 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2664 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2665 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2666 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2667 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2668 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2669 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2670 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2671 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2672 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2673 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2674 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2675 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2676 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2679 for &(s, g) in test_same.iter() {
2680 // test forward iterator
2681 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2682 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2684 // test reverse iterator
2685 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2686 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2689 for &(s, gt, gf) in test_diff.iter() {
2690 // test forward iterator
2691 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2692 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2694 // test reverse iterator
2695 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2696 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2699 // test the indices iterators
2700 let s = "a̐éö̲\r\n";
2701 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2702 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2703 assert_eq!(gr_inds, b);
2704 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2705 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2706 assert_eq!(gr_inds, b);
2707 let mut gr_inds_iter = s.grapheme_indices(true);
2709 let gr_inds = gr_inds_iter.by_ref();
2710 let e1 = gr_inds.size_hint();
2711 assert_eq!(e1, (1, Some(13)));
2712 let c = gr_inds.count();
2715 let e2 = gr_inds_iter.size_hint();
2716 assert_eq!(e2, (0, Some(0)));
2718 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2720 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2721 let b: &[_] = &["\r", "\r\n", "\n"];
2726 fn test_split_strator() {
2727 fn t(s: &str, sep: &str, u: &[&str]) {
2728 let v: Vec<&str> = s.split_str(sep).collect();
2731 t("--1233345--", "12345", &["--1233345--"]);
2732 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2733 t("::hello::there", "::", &["", "hello", "there"]);
2734 t("hello::there::", "::", &["hello", "there", ""]);
2735 t("::hello::there::", "::", &["", "hello", "there", ""]);
2736 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2737 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2738 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2739 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2741 t("zz", "zz", &["",""]);
2742 t("ok", "z", &["ok"]);
2743 t("zzz", "zz", &["","z"]);
2744 t("zzzzz", "zz", &["","","z"]);
2748 fn test_str_default() {
2749 use core::default::Default;
2750 fn t<S: Default + Str>() {
2751 let s: S = Default::default();
2752 assert_eq!(s.as_slice(), "");
2760 fn test_str_container() {
2761 fn sum_len(v: &[&str]) -> uint {
2762 v.iter().map(|x| x.len()).sum()
2765 let s = String::from_str("01234");
2766 assert_eq!(5, sum_len(&["012", "", "34"]));
2767 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2768 String::from_str("2").as_slice(),
2769 String::from_str("34").as_slice(),
2770 String::from_str("").as_slice()]));
2771 assert_eq!(5, sum_len(&[s.as_slice()]));
2775 fn test_str_from_utf8() {
2777 assert_eq!(from_utf8(xs), Ok("hello"));
2779 let xs = "ศไทย中华Việt Nam".as_bytes();
2780 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2782 let xs = b"hello\xFF";
2783 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2790 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2792 use test::black_box;
2795 fn char_iterator(b: &mut Bencher) {
2796 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2798 b.iter(|| s.chars().count());
2802 fn char_iterator_for(b: &mut Bencher) {
2803 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2806 for ch in s.chars() { black_box(ch); }
2811 fn char_iterator_ascii(b: &mut Bencher) {
2812 let s = "Mary had a little lamb, Little lamb
2813 Mary had a little lamb, Little lamb
2814 Mary had a little lamb, Little lamb
2815 Mary had a little lamb, Little lamb
2816 Mary had a little lamb, Little lamb
2817 Mary had a little lamb, Little lamb";
2819 b.iter(|| s.chars().count());
2823 fn char_iterator_rev(b: &mut Bencher) {
2824 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2826 b.iter(|| s.chars().rev().count());
2830 fn char_iterator_rev_for(b: &mut Bencher) {
2831 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2834 for ch in s.chars().rev() { black_box(ch); }
2839 fn char_indicesator(b: &mut Bencher) {
2840 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2841 let len = s.chars().count();
2843 b.iter(|| assert_eq!(s.char_indices().count(), len));
2847 fn char_indicesator_rev(b: &mut Bencher) {
2848 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2849 let len = s.chars().count();
2851 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2855 fn split_unicode_ascii(b: &mut Bencher) {
2856 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2858 b.iter(|| assert_eq!(s.split('V').count(), 3));
2862 fn split_unicode_not_ascii(b: &mut Bencher) {
2863 struct NotAscii(char);
2864 impl CharEq for NotAscii {
2865 fn matches(&mut self, c: char) -> bool {
2866 let NotAscii(cc) = *self;
2869 fn only_ascii(&self) -> bool { false }
2871 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2873 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2878 fn split_ascii(b: &mut Bencher) {
2879 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2880 let len = s.split(' ').count();
2882 b.iter(|| assert_eq!(s.split(' ').count(), len));
2886 fn split_not_ascii(b: &mut Bencher) {
2887 struct NotAscii(char);
2888 impl CharEq for NotAscii {
2890 fn matches(&mut self, c: char) -> bool {
2891 let NotAscii(cc) = *self;
2894 fn only_ascii(&self) -> bool { false }
2896 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2897 let len = s.split(' ').count();
2899 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2903 fn split_extern_fn(b: &mut Bencher) {
2904 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2905 let len = s.split(' ').count();
2906 fn pred(c: char) -> bool { c == ' ' }
2908 b.iter(|| assert_eq!(s.split(pred).count(), len));
2912 fn split_closure(b: &mut Bencher) {
2913 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2914 let len = s.split(' ').count();
2916 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2920 fn split_slice(b: &mut Bencher) {
2921 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2922 let len = s.split(' ').count();
2924 let c: &[char] = &[' '];
2925 b.iter(|| assert_eq!(s.split(c).count(), len));
2929 fn bench_connect(b: &mut Bencher) {
2930 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2932 let v = vec![s, s, s, s, s, s, s, s, s, s];
2934 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2939 fn bench_contains_short_short(b: &mut Bencher) {
2940 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2944 assert!(haystack.contains(needle));
2949 fn bench_contains_short_long(b: &mut Bencher) {
2951 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2952 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2953 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2954 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2955 tempus vel, gravida nec quam.
2957 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2958 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2959 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2960 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2961 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2962 interdum. Curabitur ut nisi justo.
2964 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2965 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2966 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2967 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2968 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2969 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2970 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2971 Aliquam sit amet placerat lorem.
2973 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2974 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2975 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2976 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2977 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2980 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2981 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2982 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2983 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2984 malesuada sollicitudin quam eu fermentum.";
2985 let needle = "english";
2988 assert!(!haystack.contains(needle));
2993 fn bench_contains_bad_naive(b: &mut Bencher) {
2994 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2995 let needle = "aaaaaaaab";
2998 assert!(!haystack.contains(needle));
3003 fn bench_contains_equal(b: &mut Bencher) {
3004 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3005 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3008 assert!(haystack.contains(needle));