From e772818294fb19622b403358db27dc6e0f11f728 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Mon, 8 Jun 2015 22:18:13 +0200 Subject: [PATCH] Reordered the methods on str to improve doc sorting --- src/libcollections/str.rs | 1837 +++++++++++++++++++------------------ 1 file changed, 919 insertions(+), 918 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 8640a56cd09..a9725214c19 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -428,719 +428,637 @@ fn to_owned(&self) -> String { #[cfg(not(test))] #[stable(feature = "rust1", since = "1.0.0")] impl str { - /// Escapes each char in `s` with `char::escape_default`. - #[unstable(feature = "collections", - reason = "return type may change to be an iterator")] - pub fn escape_default(&self) -> String { - self.chars().flat_map(|c| c.escape_default()).collect() - } - - /// Escapes each char in `s` with `char::escape_unicode`. - #[unstable(feature = "collections", - reason = "return type may change to be an iterator")] - pub fn escape_unicode(&self) -> String { - self.chars().flat_map(|c| c.escape_unicode()).collect() - } - - /// Replaces all occurrences of one string with another. - /// - /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a - /// second `&str` to - /// replace it with. If the original `&str` isn't found, no change occurs. + /// Returns the length of `self` in bytes. /// /// # Examples /// /// ``` - /// let s = "this is old"; - /// - /// assert_eq!(s.replace("old", "new"), "this is new"); - /// ``` - /// - /// When a `&str` isn't found: - /// - /// ``` - /// let s = "this is old"; - /// assert_eq!(s.replace("cookie monster", "little lamb"), s); + /// assert_eq!("foo".len(), 3); + /// assert_eq!("ƒoo".len(), 4); // fancy f! /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn replace(&self, from: &str, to: &str) -> String { - let mut result = String::new(); - let mut last_end = 0; - for (start, end) in self.match_indices(from) { - result.push_str(unsafe { self.slice_unchecked(last_end, start) }); - result.push_str(to); - last_end = end; - } - result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) }); - result - } - - /// Returns an iterator over the string in Unicode Normalization Form D - /// (canonical decomposition). - #[allow(deprecated)] - #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", - since = "1.0.0")] - #[inline] - #[unstable(feature = "unicode", - reason = "this functionality may be replaced with a more generic \ - unicode crate on crates.io")] - pub fn nfd_chars(&self) -> Decompositions { - Decompositions { - iter: self[..].chars(), - buffer: Vec::new(), - sorted: false, - kind: Canonical - } - } - - /// Returns an iterator over the string in Unicode Normalization Form KD - /// (compatibility decomposition). - #[allow(deprecated)] - #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", - since = "1.0.0")] #[inline] - #[unstable(feature = "unicode", - reason = "this functionality may be replaced with a more generic \ - unicode crate on crates.io")] - pub fn nfkd_chars(&self) -> Decompositions { - Decompositions { - iter: self[..].chars(), - buffer: Vec::new(), - sorted: false, - kind: Compatible - } + pub fn len(&self) -> usize { + core_str::StrExt::len(&self[..]) } - /// An Iterator over the string in Unicode Normalization Form C - /// (canonical decomposition followed by canonical composition). - #[allow(deprecated)] - #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", - since = "1.0.0")] + /// Returns true if this slice has a length of zero bytes. + /// + /// # Examples + /// + /// ``` + /// assert!("".is_empty()); + /// ``` #[inline] - #[unstable(feature = "unicode", - reason = "this functionality may be replaced with a more generic \ - unicode crate on crates.io")] - pub fn nfc_chars(&self) -> Recompositions { - Recompositions { - iter: self.nfd_chars(), - state: Composing, - buffer: VecDeque::new(), - composee: None, - last_ccc: None - } + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + core_str::StrExt::is_empty(&self[..]) } - /// An Iterator over the string in Unicode Normalization Form KC - /// (compatibility decomposition followed by canonical composition). - #[allow(deprecated)] - #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", - since = "1.0.0")] - #[inline] + /// Returns a string's displayed width in columns. + /// + /// Control characters have zero width. + /// + /// `is_cjk` determines behavior for characters in the Ambiguous category: + /// if `is_cjk` is + /// `true`, these are 2 columns wide; otherwise, they are 1. + /// In CJK locales, `is_cjk` should be + /// `true`, else it should be `false`. + /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) + /// recommends that these + /// characters be treated as 1 column (i.e., `is_cjk = false`) if the + /// locale is unknown. + #[deprecated(reason = "use the crates.io `unicode-width` library instead", + since = "1.0.0")] #[unstable(feature = "unicode", - reason = "this functionality may be replaced with a more generic \ - unicode crate on crates.io")] - pub fn nfkc_chars(&self) -> Recompositions { - Recompositions { - iter: self.nfkd_chars(), - state: Composing, - buffer: VecDeque::new(), - composee: None, - last_ccc: None - } + reason = "this functionality may only be provided by libunicode")] + pub fn width(&self, is_cjk: bool) -> usize { + UnicodeStr::width(&self[..], is_cjk) } - /// Returns `true` if `self` contains another `&str`. + /// Checks that `index`-th byte lies at the start and/or end of a + /// UTF-8 code point sequence. + /// + /// The start and end of the string (when `index == self.len()`) are + /// considered to be + /// boundaries. + /// + /// # Panics + /// + /// Panics if `index` is greater than `self.len()`. /// /// # Examples /// /// ``` - /// assert!("bananas".contains("nana")); + /// # #![feature(str_char)] + /// let s = "Löwe 老虎 Léopard"; + /// assert!(s.is_char_boundary(0)); + /// // start of `老` + /// assert!(s.is_char_boundary(6)); + /// assert!(s.is_char_boundary(s.len())); /// - /// assert!(!"bananas".contains("foobar")); + /// // second byte of `ö` + /// assert!(!s.is_char_boundary(2)); + /// + /// // third byte of `老` + /// assert!(!s.is_char_boundary(8)); /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { - core_str::StrExt::contains(&self[..], pat) + #[unstable(feature = "str_char", + reason = "it is unclear whether this method pulls its weight \ + with the existence of the char_indices iterator or \ + this method may want to be replaced with checked \ + slicing")] + pub fn is_char_boundary(&self, index: usize) -> bool { + core_str::StrExt::is_char_boundary(&self[..], index) } - /// An iterator over the codepoints of `self`. + /// Converts `self` to a byte slice. /// /// # Examples /// /// ``` - /// let v: Vec = "abc åäö".chars().collect(); - /// - /// assert_eq!(v, ['a', 'b', 'c', ' ', 'å', 'ä', 'ö']); + /// assert_eq!("bors".as_bytes(), b"bors"); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn chars(&self) -> Chars { - core_str::StrExt::chars(&self[..]) + #[inline(always)] + pub fn as_bytes(&self) -> &[u8] { + core_str::StrExt::as_bytes(&self[..]) } - /// An iterator over the bytes of `self`. + /// Returns an unsafe pointer to the `&str`'s buffer. + /// + /// The caller must ensure that the string outlives this pointer, and + /// that it is not + /// reallocated (e.g. by pushing to the string). /// /// # Examples /// /// ``` - /// let v: Vec = "bors".bytes().collect(); - /// - /// assert_eq!(v, b"bors".to_vec()); + /// let s = "Hello"; + /// let p = s.as_ptr(); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn bytes(&self) -> Bytes { - core_str::StrExt::bytes(&self[..]) + #[inline] + pub fn as_ptr(&self) -> *const u8 { + core_str::StrExt::as_ptr(&self[..]) } - /// An iterator over the characters of `self` and their byte offsets. + /// Takes a bytewise slice from a string. + /// + /// Returns the substring from [`begin`..`end`). + /// + /// # Unsafety + /// + /// Caller must check both UTF-8 character boundaries and the boundaries + /// of the entire slice as + /// well. /// /// # Examples /// /// ``` - /// let v: Vec<(usize, char)> = "abc".char_indices().collect(); - /// let b = vec![(0, 'a'), (1, 'b'), (2, 'c')]; + /// let s = "Löwe 老虎 Léopard"; /// - /// assert_eq!(v, b); + /// unsafe { + /// assert_eq!(s.slice_unchecked(0, 21), "Löwe 老虎 Léopard"); + /// } /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn char_indices(&self) -> CharIndices { - core_str::StrExt::char_indices(&self[..]) + pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str { + core_str::StrExt::slice_unchecked(&self[..], begin, end) } - /// An iterator over substrings of `self`, separated by characters - /// matched by a pattern. + /// Returns a slice of the string from the character range [`begin`..`end`). /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// That is, start at the `begin`-th code point of the string and continue + /// to the `end`-th code point. This does not detect or handle edge cases + /// such as leaving a combining character as the first code point of the + /// string. /// - /// # Iterator behavior + /// Due to the design of UTF-8, this operation is `O(end)`. Use slicing + /// syntax if you want to use byte indices rather than codepoint indices. /// - /// The returned iterator will be double ended if the pattern allows a - /// reverse search and forward/reverse search yields the same elements. - /// This is true for, eg, `char` but not - /// for `&str`. + /// # Panics /// - /// If the pattern allows a reverse search but its results might differ - /// from a forward search, `rsplit()` can be used. + /// Panics if `begin` > `end` or the either `begin` or `end` are beyond the + /// last character of the string. /// /// # Examples /// - /// Simple patterns: - /// /// ``` - /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); - /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); - /// - /// let v: Vec<&str> = "".split('X').collect(); - /// assert_eq!(v, [""]); - /// - /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); - /// assert_eq!(v, ["lion", "", "tiger", "leopard"]); + /// # #![feature(collections)] + /// let s = "Löwe 老虎 Léopard"; /// - /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect(); - /// assert_eq!(v, ["lion", "tiger", "leopard"]); - /// - /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect(); - /// assert_eq!(v, ["abc", "def", "ghi"]); - /// - /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect(); - /// assert_eq!(v, ["lion", "tiger", "leopard"]); - /// ``` - /// - /// A more complex pattern, using a closure: - /// - /// ``` - /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect(); - /// assert_eq!(v, ["abc", "def", "ghi"]); + /// assert_eq!(s.slice_chars(0, 4), "Löwe"); + /// assert_eq!(s.slice_chars(5, 7), "老虎"); /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { - core_str::StrExt::split(&self[..], pat) + #[unstable(feature = "collections", + reason = "may have yet to prove its worth")] + pub fn slice_chars(&self, begin: usize, end: usize) -> &str { + core_str::StrExt::slice_chars(&self[..], begin, end) } - /// An iterator over substrings of `self`, separated by characters - /// matched by a pattern and yielded in reverse order. - /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// Given a byte position, return the next char and its index. /// - /// # Iterator behavior + /// This can be used to iterate over the Unicode characters of a string. /// - /// The returned iterator requires that the pattern supports a - /// reverse search, - /// and it will be double ended if a forward/reverse search yields - /// the same elements. + /// # Panics /// - /// For iterating from the front, `split()` can be used. + /// If `i` is greater than or equal to the length of the string. + /// If `i` is not the index of the beginning of a valid UTF-8 character. /// /// # Examples /// - /// Simple patterns: - /// - /// ```rust - /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); - /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); - /// - /// let v: Vec<&str> = "".rsplit('X').collect(); - /// assert_eq!(v, [""]); + /// This example manually iterates through the characters of a string; + /// this should normally be + /// done by `.chars()` or `.char_indices()`. /// - /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect(); - /// assert_eq!(v, ["leopard", "tiger", "", "lion"]); + /// ``` + /// # #![feature(str_char, core)] + /// use std::str::CharRange; /// - /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect(); - /// assert_eq!(v, ["leopard", "tiger", "lion"]); + /// let s = "中华Việt Nam"; + /// let mut i = 0; + /// while i < s.len() { + /// let CharRange {ch, next} = s.char_range_at(i); + /// println!("{}: {}", i, ch); + /// i = next; + /// } /// ``` /// - /// A more complex pattern, using a closure: + /// This outputs: /// + /// ```text + /// 0: 中 + /// 3: 华 + /// 6: V + /// 7: i + /// 8: ệ + /// 11: t + /// 12: + /// 13: N + /// 14: a + /// 15: m /// ``` - /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect(); - /// assert_eq!(v, ["ghi", "def", "abc"]); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> - where P::Searcher: ReverseSearcher<'a> - { - core_str::StrExt::rsplit(&self[..], pat) + #[unstable(feature = "str_char", + reason = "often replaced by char_indices, this method may \ + be removed in favor of just char_at() or eventually \ + removed altogether")] + pub fn char_range_at(&self, start: usize) -> CharRange { + core_str::StrExt::char_range_at(&self[..], start) } - /// An iterator over substrings of `self`, separated by characters - /// matched by a pattern. + /// Given a byte position, return the previous `char` and its position. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns - /// like regular expressions. + /// This function can be used to iterate over a Unicode string in reverse. /// - /// Equivalent to `split`, except that the trailing substring - /// is skipped if empty. + /// Returns 0 for next index if called on start index 0. /// - /// This method can be used for string data that is _terminated_, - /// rather than _separated_ by a pattern. + /// # Panics /// - /// # Iterator behavior + /// If `i` is greater than the length of the string. + /// If `i` is not an index following a valid UTF-8 character. /// - /// The returned iterator will be double ended if the pattern allows a - /// reverse search - /// and forward/reverse search yields the same elements. This is true - /// for, eg, `char` but not for `&str`. + /// # Examples /// - /// If the pattern allows a reverse search but its results might differ - /// from a forward search, `rsplit_terminator()` can be used. + /// This example manually iterates through the characters of a string; + /// this should normally be + /// done by `.chars().rev()` or `.char_indices()`. /// - /// # Examples + /// ``` + /// # #![feature(str_char, core)] + /// use std::str::CharRange; /// + /// let s = "中华Việt Nam"; + /// let mut i = s.len(); + /// while i > 0 { + /// let CharRange {ch, next} = s.char_range_at_reverse(i); + /// println!("{}: {}", i, ch); + /// i = next; + /// } /// ``` - /// let v: Vec<&str> = "A.B.".split_terminator('.').collect(); - /// assert_eq!(v, ["A", "B"]); /// - /// let v: Vec<&str> = "A..B..".split_terminator(".").collect(); - /// assert_eq!(v, ["A", "", "B", ""]); + /// This outputs: + /// + /// ```text + /// 16: m + /// 15: a + /// 14: N + /// 13: + /// 12: t + /// 11: ệ + /// 8: i + /// 7: V + /// 6: 华 + /// 3: 中 /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> { - core_str::StrExt::split_terminator(&self[..], pat) + #[unstable(feature = "str_char", + reason = "often replaced by char_indices, this method may \ + be removed in favor of just char_at_reverse() or \ + eventually removed altogether")] + pub fn char_range_at_reverse(&self, start: usize) -> CharRange { + core_str::StrExt::char_range_at_reverse(&self[..], start) } - /// An iterator over substrings of `self`, separated by characters - /// matched by a pattern and yielded in reverse order. + /// Given a byte position, return the `char` at that position. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// # Panics /// - /// Equivalent to `split`, except that the trailing substring is - /// skipped if empty. + /// If `i` is greater than or equal to the length of the string. + /// If `i` is not the index of the beginning of a valid UTF-8 character. /// - /// This method can be used for string data that is _terminated_, - /// rather than _separated_ by a pattern. + /// # Examples /// - /// # Iterator behavior + /// ``` + /// # #![feature(str_char)] + /// let s = "abπc"; + /// assert_eq!(s.char_at(1), 'b'); + /// assert_eq!(s.char_at(2), 'π'); + /// ``` + #[unstable(feature = "str_char", + reason = "frequently replaced by the chars() iterator, this \ + method may be removed or possibly renamed in the \ + future; it is normally replaced by chars/char_indices \ + iterators or by getting the first char from a \ + subslice")] + pub fn char_at(&self, i: usize) -> char { + core_str::StrExt::char_at(&self[..], i) + } + + /// Given a byte position, return the `char` at that position, counting + /// from the end. /// - /// The returned iterator requires that the pattern supports a - /// reverse search, and it will be double ended if a forward/reverse - /// search yields the same elements. + /// # Panics /// - /// For iterating from the front, `split_terminator()` can be used. + /// If `i` is greater than the length of the string. + /// If `i` is not an index following a valid UTF-8 character. /// /// # Examples /// /// ``` - /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect(); - /// assert_eq!(v, ["B", "A"]); - /// - /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect(); - /// assert_eq!(v, ["", "B", "", "A"]); + /// # #![feature(str_char)] + /// let s = "abπc"; + /// assert_eq!(s.char_at_reverse(1), 'a'); + /// assert_eq!(s.char_at_reverse(2), 'b'); /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> - where P::Searcher: ReverseSearcher<'a> - { - core_str::StrExt::rsplit_terminator(&self[..], pat) + #[unstable(feature = "str_char", + reason = "see char_at for more details, but reverse semantics \ + are also somewhat unclear, especially with which \ + cases generate panics")] + pub fn char_at_reverse(&self, i: usize) -> char { + core_str::StrExt::char_at_reverse(&self[..], i) } - /// An iterator over substrings of `self`, separated by a pattern, - /// restricted to returning - /// at most `count` items. - /// - /// The last element returned, if any, will contain the remainder of the - /// string. - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. - /// - /// # Iterator behavior + /// Retrieves the first character from a `&str` and returns it. /// - /// The returned iterator will not be double ended, because it is - /// not efficient to support. + /// This does not allocate a new string; instead, it returns a slice that + /// points one character + /// beyond the character that was shifted. /// - /// If the pattern allows a reverse search, `rsplitn()` can be used. + /// If the slice does not contain any characters, None is returned instead. /// /// # Examples /// - /// Simple patterns: - /// /// ``` - /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect(); - /// assert_eq!(v, ["Mary", "had", "a little lambda"]); + /// # #![feature(str_char)] + /// let s = "Löwe 老虎 Léopard"; + /// let (c, s1) = s.slice_shift_char().unwrap(); /// - /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect(); - /// assert_eq!(v, ["lion", "", "tigerXleopard"]); + /// assert_eq!(c, 'L'); + /// assert_eq!(s1, "öwe 老虎 Léopard"); /// - /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect(); - /// assert_eq!(v, ["abcXdef"]); + /// let (c, s2) = s1.slice_shift_char().unwrap(); /// - /// let v: Vec<&str> = "".splitn(1, 'X').collect(); - /// assert_eq!(v, [""]); - /// ``` + /// assert_eq!(c, 'ö'); + /// assert_eq!(s2, "we 老虎 Léopard"); + /// ``` + #[unstable(feature = "str_char", + reason = "awaiting conventions about shifting and slices and \ + may not be warranted with the existence of the chars \ + and/or char_indices iterators")] + pub fn slice_shift_char(&self) -> Option<(char, &str)> { + core_str::StrExt::slice_shift_char(&self[..]) + } + + /// An iterator over the codepoints of `self`. /// - /// A more complex pattern, using a closure: + /// # Examples /// /// ``` - /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect(); - /// assert_eq!(v, ["abc", "defXghi"]); + /// let v: Vec = "abc åäö".chars().collect(); + /// + /// assert_eq!(v, ['a', 'b', 'c', ' ', 'å', 'ä', 'ö']); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { - core_str::StrExt::splitn(&self[..], count, pat) + pub fn chars(&self) -> Chars { + core_str::StrExt::chars(&self[..]) } - /// An iterator over substrings of `self`, separated by a pattern, - /// starting from the end of the string, restricted to returning - /// at most `count` items. - /// - /// The last element returned, if any, will contain the remainder of the - /// string. - /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. - /// - /// # Iterator behavior - /// - /// The returned iterator will not be double ended, because it is not - /// efficient to support. - /// - /// `splitn()` can be used for splitting from the front. + /// An iterator over the characters of `self` and their byte offsets. /// /// # Examples /// - /// Simple patterns: - /// /// ``` - /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect(); - /// assert_eq!(v, ["lamb", "little", "Mary had a"]); - /// - /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect(); - /// assert_eq!(v, ["leopard", "tiger", "lionX"]); + /// let v: Vec<(usize, char)> = "abc".char_indices().collect(); + /// let b = vec![(0, 'a'), (1, 'b'), (2, 'c')]; /// - /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect(); - /// assert_eq!(v, ["leopard", "lion::tiger"]); + /// assert_eq!(v, b); /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn char_indices(&self) -> CharIndices { + core_str::StrExt::char_indices(&self[..]) + } + + /// An iterator over the bytes of `self`. /// - /// A more complex pattern, using a closure: + /// # Examples /// /// ``` - /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect(); - /// assert_eq!(v, ["ghi", "abc1def"]); + /// let v: Vec = "bors".bytes().collect(); + /// + /// assert_eq!(v, b"bors".to_vec()); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> - where P::Searcher: ReverseSearcher<'a> - { - core_str::StrExt::rsplitn(&self[..], count, pat) + pub fn bytes(&self) -> Bytes { + core_str::StrExt::bytes(&self[..]) } - /// An iterator over the matches of a pattern within `self`. - /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. - /// - /// # Iterator behavior - /// - /// The returned iterator will be double ended if the pattern allows - /// a reverse search - /// and forward/reverse search yields the same elements. This is true - /// for, eg, `char` but not - /// for `&str`. - /// - /// If the pattern allows a reverse search but its results might differ - /// from a forward search, `rmatches()` can be used. + /// An iterator over the non-empty substrings of `self` which contain no whitespace, + /// and which are separated by any amount of whitespace. /// /// # Examples /// /// ``` - /// # #![feature(collections)] - /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect(); - /// assert_eq!(v, ["abc", "abc", "abc"]); + /// let some_words = " Mary had\ta little \n\t lamb"; + /// let v: Vec<&str> = some_words.split_whitespace().collect(); /// - /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect(); - /// assert_eq!(v, ["1", "2", "3"]); + /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); /// ``` - #[unstable(feature = "collections", - reason = "method got recently added")] - pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { - core_str::StrExt::matches(&self[..], pat) + #[stable(feature = "split_whitespace", since = "1.1.0")] + pub fn split_whitespace(&self) -> SplitWhitespace { + UnicodeStr::split_whitespace(&self[..]) } - /// An iterator over the matches of a pattern within `self`, yielded in - /// reverse order. - /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. - /// - /// # Iterator behavior - /// - /// The returned iterator requires that the pattern supports a - /// reverse search, - /// and it will be double ended if a forward/reverse search yields - /// the same elements. - /// - /// For iterating from the front, `matches()` can be used. + /// An iterator over the non-empty substrings of `self` which contain no whitespace, + /// and which are separated by any amount of whitespace. /// /// # Examples /// /// ``` - /// # #![feature(collections)] - /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect(); - /// assert_eq!(v, ["abc", "abc", "abc"]); + /// # #![feature(str_words)] + /// # #![allow(deprecated)] + /// let some_words = " Mary had\ta little \n\t lamb"; + /// let v: Vec<&str> = some_words.words().collect(); /// - /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect(); - /// assert_eq!(v, ["3", "2", "1"]); + /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); /// ``` - #[unstable(feature = "collections", - reason = "method got recently added")] - pub fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> - where P::Searcher: ReverseSearcher<'a> - { - core_str::StrExt::rmatches(&self[..], pat) + #[deprecated(reason = "words() will be removed. Use split_whitespace() instead", + since = "1.1.0")] + #[unstable(feature = "str_words", + reason = "the precise algorithm to use is unclear")] + #[allow(deprecated)] + pub fn words(&self) -> Words { + UnicodeStr::words(&self[..]) } - /// An iterator over the start and end indices of the disjoint matches - /// of a pattern within `self`. - /// - /// For matches of `pat` within `self` that overlap, only the indices - /// corresponding to the first - /// match are returned. + /// An iterator over the lines of a string, separated by `\n`. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines - /// the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// This does not include the empty string after a trailing `\n`. /// - /// # Iterator behavior + /// # Examples /// - /// The returned iterator will be double ended if the pattern allows a - /// reverse search - /// and forward/reverse search yields the same elements. This is true for, - /// eg, `char` but not - /// for `&str`. + /// ``` + /// let four_lines = "foo\nbar\n\nbaz"; + /// let v: Vec<&str> = four_lines.lines().collect(); /// - /// If the pattern allows a reverse search but its results might differ - /// from a forward search, `rmatch_indices()` can be used. + /// assert_eq!(v, ["foo", "bar", "", "baz"]); + /// ``` /// - /// # Examples + /// Leaving off the trailing character: /// /// ``` - /// # #![feature(collections)] - /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".match_indices("abc").collect(); - /// assert_eq!(v, [(0, 3), (6, 9), (12, 15)]); - /// - /// let v: Vec<(usize, usize)> = "1abcabc2".match_indices("abc").collect(); - /// assert_eq!(v, [(1, 4), (4, 7)]); + /// let four_lines = "foo\nbar\n\nbaz\n"; + /// let v: Vec<&str> = four_lines.lines().collect(); /// - /// let v: Vec<(usize, usize)> = "ababa".match_indices("aba").collect(); - /// assert_eq!(v, [(0, 3)]); // only the first `aba` + /// assert_eq!(v, ["foo", "bar", "", "baz"]); /// ``` - #[unstable(feature = "collections", - reason = "might have its iterator type changed")] - // NB: Right now MatchIndices yields `(usize, usize)`, but it would - // be more consistent with `matches` and `char_indices` to return `(usize, &str)` - pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { - core_str::StrExt::match_indices(&self[..], pat) + #[stable(feature = "rust1", since = "1.0.0")] + pub fn lines(&self) -> Lines { + core_str::StrExt::lines(&self[..]) } - /// An iterator over the start and end indices of the disjoint matches of - /// a pattern within - /// `self`, yielded in reverse order. - /// - /// For matches of `pat` within `self` that overlap, only the indices - /// corresponding to the last - /// match are returned. + /// An iterator over the lines of a string, separated by either + /// `\n` or `\r\n`. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines - /// the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// As with `.lines()`, this does not include an empty trailing line. /// - /// # Iterator behavior + /// # Examples /// - /// The returned iterator requires that the pattern supports a - /// reverse search, - /// and it will be double ended if a forward/reverse search yields - /// the same elements. + /// ``` + /// let four_lines = "foo\r\nbar\n\r\nbaz"; + /// let v: Vec<&str> = four_lines.lines_any().collect(); /// - /// For iterating from the front, `match_indices()` can be used. + /// assert_eq!(v, ["foo", "bar", "", "baz"]); + /// ``` /// - /// # Examples + /// Leaving off the trailing character: /// /// ``` - /// # #![feature(collections)] - /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".rmatch_indices("abc").collect(); - /// assert_eq!(v, [(12, 15), (6, 9), (0, 3)]); - /// - /// let v: Vec<(usize, usize)> = "1abcabc2".rmatch_indices("abc").collect(); - /// assert_eq!(v, [(4, 7), (1, 4)]); + /// let four_lines = "foo\r\nbar\n\r\nbaz\n"; + /// let v: Vec<&str> = four_lines.lines_any().collect(); /// - /// let v: Vec<(usize, usize)> = "ababa".rmatch_indices("aba").collect(); - /// assert_eq!(v, [(2, 5)]); // only the last `aba` + /// assert_eq!(v, ["foo", "bar", "", "baz"]); /// ``` - #[unstable(feature = "collections", - reason = "might have its iterator type changed")] - // NB: Right now RMatchIndices yields `(usize, usize)`, but it would - // be more consistent with `rmatches` and `char_indices` to return `(usize, &str)` - pub fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> - where P::Searcher: ReverseSearcher<'a> - { - core_str::StrExt::rmatch_indices(&self[..], pat) + #[stable(feature = "rust1", since = "1.0.0")] + pub fn lines_any(&self) -> LinesAny { + core_str::StrExt::lines_any(&self[..]) + } + + /// Returns an iterator over the string in Unicode Normalization Form D + /// (canonical decomposition). + #[allow(deprecated)] + #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", + since = "1.0.0")] + #[inline] + #[unstable(feature = "unicode", + reason = "this functionality may be replaced with a more generic \ + unicode crate on crates.io")] + pub fn nfd_chars(&self) -> Decompositions { + Decompositions { + iter: self[..].chars(), + buffer: Vec::new(), + sorted: false, + kind: Canonical + } + } + + /// Returns an iterator over the string in Unicode Normalization Form KD + /// (compatibility decomposition). + #[allow(deprecated)] + #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", + since = "1.0.0")] + #[inline] + #[unstable(feature = "unicode", + reason = "this functionality may be replaced with a more generic \ + unicode crate on crates.io")] + pub fn nfkd_chars(&self) -> Decompositions { + Decompositions { + iter: self[..].chars(), + buffer: Vec::new(), + sorted: false, + kind: Compatible + } + } + + /// An Iterator over the string in Unicode Normalization Form C + /// (canonical decomposition followed by canonical composition). + #[allow(deprecated)] + #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", + since = "1.0.0")] + #[inline] + #[unstable(feature = "unicode", + reason = "this functionality may be replaced with a more generic \ + unicode crate on crates.io")] + pub fn nfc_chars(&self) -> Recompositions { + Recompositions { + iter: self.nfd_chars(), + state: Composing, + buffer: VecDeque::new(), + composee: None, + last_ccc: None + } } - /// An iterator over the lines of a string, separated by `\n`. - /// - /// This does not include the empty string after a trailing `\n`. - /// - /// # Examples - /// - /// ``` - /// let four_lines = "foo\nbar\n\nbaz"; - /// let v: Vec<&str> = four_lines.lines().collect(); - /// - /// assert_eq!(v, ["foo", "bar", "", "baz"]); - /// ``` - /// - /// Leaving off the trailing character: - /// - /// ``` - /// let four_lines = "foo\nbar\n\nbaz\n"; - /// let v: Vec<&str> = four_lines.lines().collect(); - /// - /// assert_eq!(v, ["foo", "bar", "", "baz"]); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn lines(&self) -> Lines { - core_str::StrExt::lines(&self[..]) + /// An Iterator over the string in Unicode Normalization Form KC + /// (compatibility decomposition followed by canonical composition). + #[allow(deprecated)] + #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", + since = "1.0.0")] + #[inline] + #[unstable(feature = "unicode", + reason = "this functionality may be replaced with a more generic \ + unicode crate on crates.io")] + pub fn nfkc_chars(&self) -> Recompositions { + Recompositions { + iter: self.nfkd_chars(), + state: Composing, + buffer: VecDeque::new(), + composee: None, + last_ccc: None + } } - /// An iterator over the lines of a string, separated by either - /// `\n` or `\r\n`. + /// Returns an iterator over the [grapheme clusters][graphemes] of `self`. /// - /// As with `.lines()`, this does not include an empty trailing line. + /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries /// - /// # Examples + /// If `is_extended` is true, the iterator is over the + /// *extended grapheme clusters*; + /// otherwise, the iterator is over the *legacy grapheme clusters*. + /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) + /// recommends extended grapheme cluster boundaries for general processing. /// - /// ``` - /// let four_lines = "foo\r\nbar\n\r\nbaz"; - /// let v: Vec<&str> = four_lines.lines_any().collect(); + /// # Examples /// - /// assert_eq!(v, ["foo", "bar", "", "baz"]); /// ``` + /// # #![feature(unicode, core)] + /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::>(); + /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"]; /// - /// Leaving off the trailing character: + /// assert_eq!(&gr1[..], b); /// - /// ``` - /// let four_lines = "foo\r\nbar\n\r\nbaz\n"; - /// let v: Vec<&str> = four_lines.lines_any().collect(); + /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::>(); + /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"]; /// - /// assert_eq!(v, ["foo", "bar", "", "baz"]); + /// assert_eq!(&gr2[..], b); /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn lines_any(&self) -> LinesAny { - core_str::StrExt::lines_any(&self[..]) + #[deprecated(reason = "use the crates.io `unicode-segmentation` library instead", + since = "1.0.0")] + #[unstable(feature = "unicode", + reason = "this functionality may only be provided by libunicode")] + pub fn graphemes(&self, is_extended: bool) -> Graphemes { + UnicodeStr::graphemes(&self[..], is_extended) } - /// Returns a slice of the string from the character range [`begin`..`end`). - /// - /// That is, start at the `begin`-th code point of the string and continue - /// to the `end`-th code point. This does not detect or handle edge cases - /// such as leaving a combining character as the first code point of the - /// string. - /// - /// Due to the design of UTF-8, this operation is `O(end)`. Use slicing - /// syntax if you want to use byte indices rather than codepoint indices. - /// - /// # Panics - /// - /// Panics if `begin` > `end` or the either `begin` or `end` are beyond the - /// last character of the string. + + /// Returns an iterator over the grapheme clusters of `self` and their + /// byte offsets. See + /// `graphemes()` for more information. /// /// # Examples /// /// ``` - /// # #![feature(collections)] - /// let s = "Löwe 老虎 Léopard"; + /// # #![feature(unicode, core)] + /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::>(); + /// let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]; /// - /// assert_eq!(s.slice_chars(0, 4), "Löwe"); - /// assert_eq!(s.slice_chars(5, 7), "老虎"); + /// assert_eq!(&gr_inds[..], b); /// ``` + #[deprecated(reason = "use the crates.io `unicode-segmentation` library instead", + since = "1.0.0")] + #[unstable(feature = "unicode", + reason = "this functionality may only be provided by libunicode")] + pub fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices { + UnicodeStr::grapheme_indices(&self[..], is_extended) + } + + /// Returns an iterator of `u16` over the string encoded as UTF-16. #[unstable(feature = "collections", - reason = "may have yet to prove its worth")] - pub fn slice_chars(&self, begin: usize, end: usize) -> &str { - core_str::StrExt::slice_chars(&self[..], begin, end) + reason = "this functionality may only be provided by libunicode")] + pub fn utf16_units(&self) -> Utf16Units { + Utf16Units { encoder: Utf16Encoder::new(self[..].chars()) } } - /// Takes a bytewise slice from a string. - /// - /// Returns the substring from [`begin`..`end`). - /// - /// # Unsafety - /// - /// Caller must check both UTF-8 character boundaries and the boundaries - /// of the entire slice as - /// well. + /// Returns `true` if `self` contains another `&str`. /// /// # Examples /// /// ``` - /// let s = "Löwe 老虎 Léopard"; + /// assert!("bananas".contains("nana")); /// - /// unsafe { - /// assert_eq!(s.slice_unchecked(0, 21), "Löwe 老虎 Léopard"); - /// } + /// assert!(!"bananas".contains("foobar")); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str { - core_str::StrExt::slice_unchecked(&self[..], begin, end) + pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { + core_str::StrExt::contains(&self[..], pat) } /// Returns `true` if the given `&str` is a prefix of the string. @@ -1169,404 +1087,524 @@ pub fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool core_str::StrExt::ends_with(&self[..], pat) } - /// Returns a string with all pre- and suffixes that match a pattern - /// repeatedly removed. + /// Returns the byte index of the first character of `self` that matches + /// the pattern, if it + /// exists. /// - /// The pattern can be a simple `char`, or a closure that determines - /// the split. + /// Returns `None` if it doesn't exist. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the + /// split. /// /// # Examples /// /// Simple patterns: /// /// ``` - /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar"); - /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar"); + /// let s = "Löwe 老虎 Léopard"; + /// + /// assert_eq!(s.find('L'), Some(0)); + /// assert_eq!(s.find('é'), Some(14)); + /// assert_eq!(s.find("Léopard"), Some(13)); /// + /// ``` + /// + /// More complex patterns with closures: + /// + /// ``` + /// let s = "Löwe 老虎 Léopard"; + /// + /// assert_eq!(s.find(char::is_whitespace), Some(5)); + /// assert_eq!(s.find(char::is_lowercase), Some(1)); + /// ``` + /// + /// Not finding the pattern: + /// + /// ``` + /// let s = "Löwe 老虎 Léopard"; /// let x: &[_] = &['1', '2']; - /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar"); + /// + /// assert_eq!(s.find(x), None); /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option { + core_str::StrExt::find(&self[..], pat) + } + + /// Returns the byte index of the last character of `self` that + /// matches the pattern, if it + /// exists. /// - /// A more complex pattern, using a closure: + /// Returns `None` if it doesn't exist. + /// + /// The pattern can be a simple `&str`, `char`, + /// or a closure that determines the split. + /// + /// # Examples + /// + /// Simple patterns: /// /// ``` - /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar"); + /// let s = "Löwe 老虎 Léopard"; + /// + /// assert_eq!(s.rfind('L'), Some(13)); + /// assert_eq!(s.rfind('é'), Some(14)); + /// ``` + /// + /// More complex patterns with closures: + /// + /// ``` + /// let s = "Löwe 老虎 Léopard"; + /// + /// assert_eq!(s.rfind(char::is_whitespace), Some(12)); + /// assert_eq!(s.rfind(char::is_lowercase), Some(20)); + /// ``` + /// + /// Not finding the pattern: + /// + /// ``` + /// let s = "Löwe 老虎 Léopard"; + /// let x: &[_] = &['1', '2']; + /// + /// assert_eq!(s.rfind(x), None); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str - where P::Searcher: DoubleEndedSearcher<'a> + pub fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option + where P::Searcher: ReverseSearcher<'a> { - core_str::StrExt::trim_matches(&self[..], pat) + core_str::StrExt::rfind(&self[..], pat) } - /// Returns a string with all prefixes that match a pattern - /// repeatedly removed. + /// An iterator over substrings of `self`, separated by characters + /// matched by a pattern. /// /// The pattern can be a simple `&str`, `char`, or a closure that /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows a + /// reverse search and forward/reverse search yields the same elements. + /// This is true for, eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rsplit()` can be used. /// /// # Examples /// + /// Simple patterns: + /// + /// ``` + /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); + /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); + /// + /// let v: Vec<&str> = "".split('X').collect(); + /// assert_eq!(v, [""]); + /// + /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); + /// assert_eq!(v, ["lion", "", "tiger", "leopard"]); + /// + /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect(); + /// assert_eq!(v, ["lion", "tiger", "leopard"]); + /// + /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect(); + /// assert_eq!(v, ["abc", "def", "ghi"]); + /// + /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect(); + /// assert_eq!(v, ["lion", "tiger", "leopard"]); /// ``` - /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11"); - /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123"); /// - /// let x: &[_] = &['1', '2']; - /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12"); + /// A more complex pattern, using a closure: + /// + /// ``` + /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect(); + /// assert_eq!(v, ["abc", "def", "ghi"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str { - core_str::StrExt::trim_left_matches(&self[..], pat) + pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { + core_str::StrExt::split(&self[..], pat) } - /// Returns a string with all suffixes that match a pattern - /// repeatedly removed. + /// An iterator over substrings of `self`, separated by characters + /// matched by a pattern and yielded in reverse order. /// /// The pattern can be a simple `&str`, `char`, or a closure that /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator requires that the pattern supports a + /// reverse search, + /// and it will be double ended if a forward/reverse search yields + /// the same elements. + /// + /// For iterating from the front, `split()` can be used. /// /// # Examples /// /// Simple patterns: /// - /// ``` - /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar"); - /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar"); + /// ```rust + /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); + /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); /// - /// let x: &[_] = &['1', '2']; - /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar"); + /// let v: Vec<&str> = "".rsplit('X').collect(); + /// assert_eq!(v, [""]); + /// + /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect(); + /// assert_eq!(v, ["leopard", "tiger", "", "lion"]); + /// + /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect(); + /// assert_eq!(v, ["leopard", "tiger", "lion"]); /// ``` /// /// A more complex pattern, using a closure: /// /// ``` - /// assert_eq!("1fooX".trim_left_matches(|c| c == '1' || c == 'X'), "fooX"); + /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect(); + /// assert_eq!(v, ["ghi", "def", "abc"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str + pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> where P::Searcher: ReverseSearcher<'a> { - core_str::StrExt::trim_right_matches(&self[..], pat) + core_str::StrExt::rsplit(&self[..], pat) } - /// Checks that `index`-th byte lies at the start and/or end of a - /// UTF-8 code point sequence. - /// - /// The start and end of the string (when `index == self.len()`) are - /// considered to be - /// boundaries. - /// - /// # Panics - /// - /// Panics if `index` is greater than `self.len()`. - /// - /// # Examples + /// An iterator over substrings of `self`, separated by characters + /// matched by a pattern. /// - /// ``` - /// # #![feature(str_char)] - /// let s = "Löwe 老虎 Léopard"; - /// assert!(s.is_char_boundary(0)); - /// // start of `老` - /// assert!(s.is_char_boundary(6)); - /// assert!(s.is_char_boundary(s.len())); + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns + /// like regular expressions. /// - /// // second byte of `ö` - /// assert!(!s.is_char_boundary(2)); + /// Equivalent to `split`, except that the trailing substring + /// is skipped if empty. /// - /// // third byte of `老` - /// assert!(!s.is_char_boundary(8)); - /// ``` - #[unstable(feature = "str_char", - reason = "it is unclear whether this method pulls its weight \ - with the existence of the char_indices iterator or \ - this method may want to be replaced with checked \ - slicing")] - pub fn is_char_boundary(&self, index: usize) -> bool { - core_str::StrExt::is_char_boundary(&self[..], index) - } - - /// Given a byte position, return the next char and its index. + /// This method can be used for string data that is _terminated_, + /// rather than _separated_ by a pattern. /// - /// This can be used to iterate over the Unicode characters of a string. + /// # Iterator behavior /// - /// # Panics + /// The returned iterator will be double ended if the pattern allows a + /// reverse search + /// and forward/reverse search yields the same elements. This is true + /// for, eg, `char` but not for `&str`. /// - /// If `i` is greater than or equal to the length of the string. - /// If `i` is not the index of the beginning of a valid UTF-8 character. + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rsplit_terminator()` can be used. /// /// # Examples /// - /// This example manually iterates through the characters of a string; - /// this should normally be - /// done by `.chars()` or `.char_indices()`. - /// - /// ``` - /// # #![feature(str_char, core)] - /// use std::str::CharRange; - /// - /// let s = "中华Việt Nam"; - /// let mut i = 0; - /// while i < s.len() { - /// let CharRange {ch, next} = s.char_range_at(i); - /// println!("{}: {}", i, ch); - /// i = next; - /// } /// ``` + /// let v: Vec<&str> = "A.B.".split_terminator('.').collect(); + /// assert_eq!(v, ["A", "B"]); /// - /// This outputs: - /// - /// ```text - /// 0: 中 - /// 3: 华 - /// 6: V - /// 7: i - /// 8: ệ - /// 11: t - /// 12: - /// 13: N - /// 14: a - /// 15: m + /// let v: Vec<&str> = "A..B..".split_terminator(".").collect(); + /// assert_eq!(v, ["A", "", "B", ""]); /// ``` - #[unstable(feature = "str_char", - reason = "often replaced by char_indices, this method may \ - be removed in favor of just char_at() or eventually \ - removed altogether")] - pub fn char_range_at(&self, start: usize) -> CharRange { - core_str::StrExt::char_range_at(&self[..], start) + #[stable(feature = "rust1", since = "1.0.0")] + pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> { + core_str::StrExt::split_terminator(&self[..], pat) } - /// Given a byte position, return the previous `char` and its position. + /// An iterator over substrings of `self`, separated by characters + /// matched by a pattern and yielded in reverse order. /// - /// This function can be used to iterate over a Unicode string in reverse. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// - /// Returns 0 for next index if called on start index 0. + /// Equivalent to `split`, except that the trailing substring is + /// skipped if empty. /// - /// # Panics + /// This method can be used for string data that is _terminated_, + /// rather than _separated_ by a pattern. /// - /// If `i` is greater than the length of the string. - /// If `i` is not an index following a valid UTF-8 character. + /// # Iterator behavior /// - /// # Examples + /// The returned iterator requires that the pattern supports a + /// reverse search, and it will be double ended if a forward/reverse + /// search yields the same elements. /// - /// This example manually iterates through the characters of a string; - /// this should normally be - /// done by `.chars().rev()` or `.char_indices()`. + /// For iterating from the front, `split_terminator()` can be used. /// - /// ``` - /// # #![feature(str_char, core)] - /// use std::str::CharRange; + /// # Examples /// - /// let s = "中华Việt Nam"; - /// let mut i = s.len(); - /// while i > 0 { - /// let CharRange {ch, next} = s.char_range_at_reverse(i); - /// println!("{}: {}", i, ch); - /// i = next; - /// } /// ``` + /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect(); + /// assert_eq!(v, ["B", "A"]); /// - /// This outputs: - /// - /// ```text - /// 16: m - /// 15: a - /// 14: N - /// 13: - /// 12: t - /// 11: ệ - /// 8: i - /// 7: V - /// 6: 华 - /// 3: 中 + /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect(); + /// assert_eq!(v, ["", "B", "", "A"]); /// ``` - #[unstable(feature = "str_char", - reason = "often replaced by char_indices, this method may \ - be removed in favor of just char_at_reverse() or \ - eventually removed altogether")] - pub fn char_range_at_reverse(&self, start: usize) -> CharRange { - core_str::StrExt::char_range_at_reverse(&self[..], start) + #[stable(feature = "rust1", since = "1.0.0")] + pub fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rsplit_terminator(&self[..], pat) } - /// Given a byte position, return the `char` at that position. - /// - /// # Panics - /// - /// If `i` is greater than or equal to the length of the string. - /// If `i` is not the index of the beginning of a valid UTF-8 character. + /// An iterator over substrings of `self`, separated by a pattern, + /// restricted to returning + /// at most `count` items. /// - /// # Examples + /// The last element returned, if any, will contain the remainder of the + /// string. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// - /// ``` - /// # #![feature(str_char)] - /// let s = "abπc"; - /// assert_eq!(s.char_at(1), 'b'); - /// assert_eq!(s.char_at(2), 'π'); - /// ``` - #[unstable(feature = "str_char", - reason = "frequently replaced by the chars() iterator, this \ - method may be removed or possibly renamed in the \ - future; it is normally replaced by chars/char_indices \ - iterators or by getting the first char from a \ - subslice")] - pub fn char_at(&self, i: usize) -> char { - core_str::StrExt::char_at(&self[..], i) - } - - /// Given a byte position, return the `char` at that position, counting - /// from the end. + /// # Iterator behavior /// - /// # Panics + /// The returned iterator will not be double ended, because it is + /// not efficient to support. /// - /// If `i` is greater than the length of the string. - /// If `i` is not an index following a valid UTF-8 character. + /// If the pattern allows a reverse search, `rsplitn()` can be used. /// /// # Examples /// + /// Simple patterns: + /// /// ``` - /// # #![feature(str_char)] - /// let s = "abπc"; - /// assert_eq!(s.char_at_reverse(1), 'a'); - /// assert_eq!(s.char_at_reverse(2), 'b'); + /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect(); + /// assert_eq!(v, ["Mary", "had", "a little lambda"]); + /// + /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect(); + /// assert_eq!(v, ["lion", "", "tigerXleopard"]); + /// + /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect(); + /// assert_eq!(v, ["abcXdef"]); + /// + /// let v: Vec<&str> = "".splitn(1, 'X').collect(); + /// assert_eq!(v, [""]); /// ``` - #[unstable(feature = "str_char", - reason = "see char_at for more details, but reverse semantics \ - are also somewhat unclear, especially with which \ - cases generate panics")] - pub fn char_at_reverse(&self, i: usize) -> char { - core_str::StrExt::char_at_reverse(&self[..], i) - } - - /// Converts `self` to a byte slice. /// - /// # Examples + /// A more complex pattern, using a closure: /// /// ``` - /// assert_eq!("bors".as_bytes(), b"bors"); + /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect(); + /// assert_eq!(v, ["abc", "defXghi"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - #[inline(always)] - pub fn as_bytes(&self) -> &[u8] { - core_str::StrExt::as_bytes(&self[..]) + pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { + core_str::StrExt::splitn(&self[..], count, pat) } - /// Returns the byte index of the first character of `self` that matches - /// the pattern, if it - /// exists. + /// An iterator over substrings of `self`, separated by a pattern, + /// starting from the end of the string, restricted to returning + /// at most `count` items. /// - /// Returns `None` if it doesn't exist. + /// The last element returned, if any, will contain the remainder of the + /// string. /// /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the - /// split. + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will not be double ended, because it is not + /// efficient to support. + /// + /// `splitn()` can be used for splitting from the front. /// /// # Examples /// /// Simple patterns: /// /// ``` - /// let s = "Löwe 老虎 Léopard"; + /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect(); + /// assert_eq!(v, ["lamb", "little", "Mary had a"]); /// - /// assert_eq!(s.find('L'), Some(0)); - /// assert_eq!(s.find('é'), Some(14)); - /// assert_eq!(s.find("Léopard"), Some(13)); + /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect(); + /// assert_eq!(v, ["leopard", "tiger", "lionX"]); /// + /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect(); + /// assert_eq!(v, ["leopard", "lion::tiger"]); /// ``` /// - /// More complex patterns with closures: + /// A more complex pattern, using a closure: /// /// ``` - /// let s = "Löwe 老虎 Léopard"; - /// - /// assert_eq!(s.find(char::is_whitespace), Some(5)); - /// assert_eq!(s.find(char::is_lowercase), Some(1)); + /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect(); + /// assert_eq!(v, ["ghi", "abc1def"]); /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rsplitn(&self[..], count, pat) + } + + /// An iterator over the matches of a pattern within `self`. /// - /// Not finding the pattern: + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows + /// a reverse search + /// and forward/reverse search yields the same elements. This is true + /// for, eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rmatches()` can be used. + /// + /// # Examples /// /// ``` - /// let s = "Löwe 老虎 Léopard"; - /// let x: &[_] = &['1', '2']; + /// # #![feature(collections)] + /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect(); + /// assert_eq!(v, ["abc", "abc", "abc"]); /// - /// assert_eq!(s.find(x), None); + /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect(); + /// assert_eq!(v, ["1", "2", "3"]); /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option { - core_str::StrExt::find(&self[..], pat) + #[unstable(feature = "collections", + reason = "method got recently added")] + pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { + core_str::StrExt::matches(&self[..], pat) } - /// Returns the byte index of the last character of `self` that - /// matches the pattern, if it - /// exists. + /// An iterator over the matches of a pattern within `self`, yielded in + /// reverse order. /// - /// Returns `None` if it doesn't exist. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// - /// The pattern can be a simple `&str`, `char`, - /// or a closure that determines the split. + /// # Iterator behavior /// - /// # Examples + /// The returned iterator requires that the pattern supports a + /// reverse search, + /// and it will be double ended if a forward/reverse search yields + /// the same elements. /// - /// Simple patterns: + /// For iterating from the front, `matches()` can be used. + /// + /// # Examples /// /// ``` - /// let s = "Löwe 老虎 Léopard"; + /// # #![feature(collections)] + /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect(); + /// assert_eq!(v, ["abc", "abc", "abc"]); /// - /// assert_eq!(s.rfind('L'), Some(13)); - /// assert_eq!(s.rfind('é'), Some(14)); + /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect(); + /// assert_eq!(v, ["3", "2", "1"]); /// ``` + #[unstable(feature = "collections", + reason = "method got recently added")] + pub fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rmatches(&self[..], pat) + } + + /// An iterator over the start and end indices of the disjoint matches + /// of a pattern within `self`. /// - /// More complex patterns with closures: + /// For matches of `pat` within `self` that overlap, only the indices + /// corresponding to the first + /// match are returned. /// - /// ``` - /// let s = "Löwe 老虎 Léopard"; + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines + /// the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// - /// assert_eq!(s.rfind(char::is_whitespace), Some(12)); - /// assert_eq!(s.rfind(char::is_lowercase), Some(20)); - /// ``` + /// # Iterator behavior /// - /// Not finding the pattern: + /// The returned iterator will be double ended if the pattern allows a + /// reverse search + /// and forward/reverse search yields the same elements. This is true for, + /// eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rmatch_indices()` can be used. + /// + /// # Examples /// /// ``` - /// let s = "Löwe 老虎 Léopard"; - /// let x: &[_] = &['1', '2']; + /// # #![feature(collections)] + /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".match_indices("abc").collect(); + /// assert_eq!(v, [(0, 3), (6, 9), (12, 15)]); /// - /// assert_eq!(s.rfind(x), None); + /// let v: Vec<(usize, usize)> = "1abcabc2".match_indices("abc").collect(); + /// assert_eq!(v, [(1, 4), (4, 7)]); + /// + /// let v: Vec<(usize, usize)> = "ababa".match_indices("aba").collect(); + /// assert_eq!(v, [(0, 3)]); // only the first `aba` /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option - where P::Searcher: ReverseSearcher<'a> - { - core_str::StrExt::rfind(&self[..], pat) + #[unstable(feature = "collections", + reason = "might have its iterator type changed")] + // NB: Right now MatchIndices yields `(usize, usize)`, but it would + // be more consistent with `matches` and `char_indices` to return `(usize, &str)` + pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { + core_str::StrExt::match_indices(&self[..], pat) } - /// Retrieves the first character from a `&str` and returns it. + /// An iterator over the start and end indices of the disjoint matches of + /// a pattern within + /// `self`, yielded in reverse order. /// - /// This does not allocate a new string; instead, it returns a slice that - /// points one character - /// beyond the character that was shifted. + /// For matches of `pat` within `self` that overlap, only the indices + /// corresponding to the last + /// match are returned. /// - /// If the slice does not contain any characters, None is returned instead. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines + /// the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator requires that the pattern supports a + /// reverse search, + /// and it will be double ended if a forward/reverse search yields + /// the same elements. + /// + /// For iterating from the front, `match_indices()` can be used. /// /// # Examples /// /// ``` - /// # #![feature(str_char)] - /// let s = "Löwe 老虎 Léopard"; - /// let (c, s1) = s.slice_shift_char().unwrap(); - /// - /// assert_eq!(c, 'L'); - /// assert_eq!(s1, "öwe 老虎 Léopard"); + /// # #![feature(collections)] + /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".rmatch_indices("abc").collect(); + /// assert_eq!(v, [(12, 15), (6, 9), (0, 3)]); /// - /// let (c, s2) = s1.slice_shift_char().unwrap(); + /// let v: Vec<(usize, usize)> = "1abcabc2".rmatch_indices("abc").collect(); + /// assert_eq!(v, [(4, 7), (1, 4)]); /// - /// assert_eq!(c, 'ö'); - /// assert_eq!(s2, "we 老虎 Léopard"); + /// let v: Vec<(usize, usize)> = "ababa".rmatch_indices("aba").collect(); + /// assert_eq!(v, [(2, 5)]); // only the last `aba` /// ``` - #[unstable(feature = "str_char", - reason = "awaiting conventions about shifting and slices and \ - may not be warranted with the existence of the chars \ - and/or char_indices iterators")] - pub fn slice_shift_char(&self) -> Option<(char, &str)> { - core_str::StrExt::slice_shift_char(&self[..]) + #[unstable(feature = "collections", + reason = "might have its iterator type changed")] + // NB: Right now RMatchIndices yields `(usize, usize)`, but it would + // be more consistent with `rmatches` and `char_indices` to return `(usize, &str)` + pub fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rmatch_indices(&self[..], pat) } /// Returns the byte offset of an inner slice relative to an enclosing @@ -1593,230 +1631,179 @@ pub fn subslice_offset(&self, inner: &str) -> usize { core_str::StrExt::subslice_offset(&self[..], inner) } - /// Returns an unsafe pointer to the `&str`'s buffer. - /// - /// The caller must ensure that the string outlives this pointer, and - /// that it is not - /// reallocated (e.g. by pushing to the string). + /// Returns a `&str` with leading and trailing whitespace removed. /// /// # Examples /// /// ``` - /// let s = "Hello"; - /// let p = s.as_ptr(); + /// let s = " Hello\tworld\t"; + /// assert_eq!(s.trim(), "Hello\tworld"); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - #[inline] - pub fn as_ptr(&self) -> *const u8 { - core_str::StrExt::as_ptr(&self[..]) - } - - /// Returns an iterator of `u16` over the string encoded as UTF-16. - #[unstable(feature = "collections", - reason = "this functionality may only be provided by libunicode")] - pub fn utf16_units(&self) -> Utf16Units { - Utf16Units { encoder: Utf16Encoder::new(self[..].chars()) } + pub fn trim(&self) -> &str { + UnicodeStr::trim(&self[..]) } - /// Returns the length of `self` in bytes. + /// Returns a `&str` with leading whitespace removed. /// /// # Examples /// /// ``` - /// assert_eq!("foo".len(), 3); - /// assert_eq!("ƒoo".len(), 4); // fancy f! + /// let s = " Hello\tworld\t"; + /// assert_eq!(s.trim_left(), "Hello\tworld\t"); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - #[inline] - pub fn len(&self) -> usize { - core_str::StrExt::len(&self[..]) + pub fn trim_left(&self) -> &str { + UnicodeStr::trim_left(&self[..]) } - /// Returns true if this slice has a length of zero bytes. + /// Returns a `&str` with trailing whitespace removed. /// /// # Examples /// /// ``` - /// assert!("".is_empty()); + /// let s = " Hello\tworld\t"; + /// assert_eq!(s.trim_right(), " Hello\tworld"); /// ``` - #[inline] #[stable(feature = "rust1", since = "1.0.0")] - pub fn is_empty(&self) -> bool { - core_str::StrExt::is_empty(&self[..]) + pub fn trim_right(&self) -> &str { + UnicodeStr::trim_right(&self[..]) } - /// Parses `self` into the specified type. + /// Returns a string with all pre- and suffixes that match a pattern + /// repeatedly removed. /// - /// # Failure + /// The pattern can be a simple `char`, or a closure that determines + /// the split. /// - /// Will return `Err` if it's not possible to parse `self` into the type. + /// # Examples /// - /// # Example + /// Simple patterns: /// /// ``` - /// assert_eq!("4".parse::(), Ok(4)); + /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar"); + /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar"); + /// + /// let x: &[_] = &['1', '2']; + /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar"); /// ``` /// - /// Failing: + /// A more complex pattern, using a closure: /// /// ``` - /// assert!("j".parse::().is_err()); + /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar"); /// ``` - #[inline] #[stable(feature = "rust1", since = "1.0.0")] - pub fn parse(&self) -> Result { - core_str::StrExt::parse(&self[..]) + pub fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str + where P::Searcher: DoubleEndedSearcher<'a> + { + core_str::StrExt::trim_matches(&self[..], pat) } - /// Returns an iterator over the [grapheme clusters][graphemes] of `self`. - /// - /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries + /// Returns a string with all prefixes that match a pattern + /// repeatedly removed. /// - /// If `is_extended` is true, the iterator is over the - /// *extended grapheme clusters*; - /// otherwise, the iterator is over the *legacy grapheme clusters*. - /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) - /// recommends extended grapheme cluster boundaries for general processing. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. /// /// # Examples /// /// ``` - /// # #![feature(unicode, core)] - /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::>(); - /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"]; - /// - /// assert_eq!(&gr1[..], b); - /// - /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::>(); - /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"]; + /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11"); + /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123"); /// - /// assert_eq!(&gr2[..], b); + /// let x: &[_] = &['1', '2']; + /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12"); /// ``` - #[deprecated(reason = "use the crates.io `unicode-segmentation` library instead", - since = "1.0.0")] - #[unstable(feature = "unicode", - reason = "this functionality may only be provided by libunicode")] - pub fn graphemes(&self, is_extended: bool) -> Graphemes { - UnicodeStr::graphemes(&self[..], is_extended) + #[stable(feature = "rust1", since = "1.0.0")] + pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str { + core_str::StrExt::trim_left_matches(&self[..], pat) } - /// Returns an iterator over the grapheme clusters of `self` and their - /// byte offsets. See - /// `graphemes()` for more information. + /// Returns a string with all suffixes that match a pattern + /// repeatedly removed. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. /// /// # Examples /// + /// Simple patterns: + /// /// ``` - /// # #![feature(unicode, core)] - /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::>(); - /// let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]; + /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar"); + /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar"); /// - /// assert_eq!(&gr_inds[..], b); + /// let x: &[_] = &['1', '2']; + /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar"); /// ``` - #[deprecated(reason = "use the crates.io `unicode-segmentation` library instead", - since = "1.0.0")] - #[unstable(feature = "unicode", - reason = "this functionality may only be provided by libunicode")] - pub fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices { - UnicodeStr::grapheme_indices(&self[..], is_extended) - } - - /// An iterator over the non-empty substrings of `self` which contain no whitespace, - /// and which are separated by any amount of whitespace. /// - /// # Examples + /// A more complex pattern, using a closure: /// /// ``` - /// # #![feature(str_words)] - /// # #![allow(deprecated)] - /// let some_words = " Mary had\ta little \n\t lamb"; - /// let v: Vec<&str> = some_words.words().collect(); - /// - /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); + /// assert_eq!("1fooX".trim_left_matches(|c| c == '1' || c == 'X'), "fooX"); /// ``` - #[deprecated(reason = "words() will be removed. Use split_whitespace() instead", - since = "1.1.0")] - #[unstable(feature = "str_words", - reason = "the precise algorithm to use is unclear")] - #[allow(deprecated)] - pub fn words(&self) -> Words { - UnicodeStr::words(&self[..]) + #[stable(feature = "rust1", since = "1.0.0")] + pub fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::trim_right_matches(&self[..], pat) } - /// An iterator over the non-empty substrings of `self` which contain no whitespace, - /// and which are separated by any amount of whitespace. - /// - /// # Examples + /// Parses `self` into the specified type. /// - /// ``` - /// let some_words = " Mary had\ta little \n\t lamb"; - /// let v: Vec<&str> = some_words.split_whitespace().collect(); + /// # Failure /// - /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); - /// ``` - #[stable(feature = "split_whitespace", since = "1.1.0")] - pub fn split_whitespace(&self) -> SplitWhitespace { - UnicodeStr::split_whitespace(&self[..]) - } - - /// Returns a string's displayed width in columns. + /// Will return `Err` if it's not possible to parse `self` into the type. /// - /// Control characters have zero width. + /// # Example /// - /// `is_cjk` determines behavior for characters in the Ambiguous category: - /// if `is_cjk` is - /// `true`, these are 2 columns wide; otherwise, they are 1. - /// In CJK locales, `is_cjk` should be - /// `true`, else it should be `false`. - /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) - /// recommends that these - /// characters be treated as 1 column (i.e., `is_cjk = false`) if the - /// locale is unknown. - #[deprecated(reason = "use the crates.io `unicode-width` library instead", - since = "1.0.0")] - #[unstable(feature = "unicode", - reason = "this functionality may only be provided by libunicode")] - pub fn width(&self, is_cjk: bool) -> usize { - UnicodeStr::width(&self[..], is_cjk) - } - - /// Returns a `&str` with leading and trailing whitespace removed. + /// ``` + /// assert_eq!("4".parse::(), Ok(4)); + /// ``` /// - /// # Examples + /// Failing: /// /// ``` - /// let s = " Hello\tworld\t"; - /// assert_eq!(s.trim(), "Hello\tworld"); + /// assert!("j".parse::().is_err()); /// ``` + #[inline] #[stable(feature = "rust1", since = "1.0.0")] - pub fn trim(&self) -> &str { - UnicodeStr::trim(&self[..]) + pub fn parse(&self) -> Result { + core_str::StrExt::parse(&self[..]) } - /// Returns a `&str` with leading whitespace removed. + /// Replaces all occurrences of one string with another. + /// + /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a + /// second `&str` to + /// replace it with. If the original `&str` isn't found, no change occurs. /// /// # Examples /// /// ``` - /// let s = " Hello\tworld\t"; - /// assert_eq!(s.trim_left(), "Hello\tworld\t"); + /// let s = "this is old"; + /// + /// assert_eq!(s.replace("old", "new"), "this is new"); /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn trim_left(&self) -> &str { - UnicodeStr::trim_left(&self[..]) - } - - /// Returns a `&str` with trailing whitespace removed. /// - /// # Examples + /// When a `&str` isn't found: /// /// ``` - /// let s = " Hello\tworld\t"; - /// assert_eq!(s.trim_right(), " Hello\tworld"); + /// let s = "this is old"; + /// assert_eq!(s.replace("cookie monster", "little lamb"), s); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn trim_right(&self) -> &str { - UnicodeStr::trim_right(&self[..]) + pub fn replace(&self, from: &str, to: &str) -> String { + let mut result = String::new(); + let mut last_end = 0; + for (start, end) in self.match_indices(from) { + result.push_str(unsafe { self.slice_unchecked(last_end, start) }); + result.push_str(to); + last_end = end; + } + result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) }); + result } /// Returns the lowercase equivalent of this string. @@ -1852,4 +1839,18 @@ pub fn to_uppercase(&self) -> String { s.extend(self[..].chars().flat_map(|c| c.to_uppercase())); return s; } + + /// Escapes each char in `s` with `char::escape_default`. + #[unstable(feature = "collections", + reason = "return type may change to be an iterator")] + pub fn escape_default(&self) -> String { + self.chars().flat_map(|c| c.escape_default()).collect() + } + + /// Escapes each char in `s` with `char::escape_unicode`. + #[unstable(feature = "collections", + reason = "return type may change to be an iterator")] + pub fn escape_unicode(&self) -> String { + self.chars().flat_map(|c| c.escape_unicode()).collect() + } } -- 2.44.0