use self::RecompositionState::*;
use self::DecompositionType::*;
+use core::prelude::*;
+
use core::borrow::{BorrowFrom, Cow, ToOwned};
-use core::clone::Clone;
+use core::cmp::{mod, Equiv, PartialEq, Eq, PartialOrd, Ord, Ordering};
use core::default::Default;
use core::fmt;
use core::hash;
-use core::char::Char;
-use core::cmp::{mod, Eq, Equiv, Ord, Ordering, PartialEq, PartialOrd};
-use core::iter::{range, AdditiveIterator, Iterator, IteratorExt};
-use core::kinds::Sized;
-use core::option::Option::{mod, Some, None};
-use core::slice::{AsSlice, SliceExt};
+use core::iter::AdditiveIterator;
+use core::iter::{mod, range, Iterator, IteratorExt};
+use core::str as core_str;
+use unicode::str::{UnicodeStr, Utf16Encoder};
use ring_buf::RingBuf;
-use string::String;
+use string::{String, ToString};
use unicode;
use vec::Vec;
-pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
+pub use core::str::{from_utf8, CharEq, Chars, CharIndices};
pub use core::str::{Bytes, CharSplits};
-pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
-pub use core::str::{Utf16Encoder, Utf16CodeUnits};
-pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
-pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
-pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
-pub use core::str::{FromStr, from_str};
-pub use core::str::{Str, StrPrelude};
+pub use core::str::{CharSplitsN, Lines, LinesAny, MatchIndices, StrSplits};
+pub use core::str::{CharRange};
+pub use core::str::{FromStr, from_str, Utf8Error};
+pub use core::str::Str;
pub use core::str::{from_utf8_unchecked, from_c_str};
-pub use unicode::str::{UnicodeStrPrelude, Words, Graphemes, GraphemeIndices};
+pub use unicode::str::{Words, Graphemes, GraphemeIndices};
// FIXME(conventions): ensure bit/char conventions are followed by str's API
*/
/// Methods for vectors of strings.
+#[unstable = "functionality may be replaced with iterators"]
pub trait StrVector for Sized? {
/// Concatenates a vector of strings.
///
fn connect(&self, sep: &str) -> String;
}
+#[allow(deprecated)]
impl<S: Str> StrVector for [S] {
fn concat(&self) -> String {
if self.is_empty() {
let mut result = String::with_capacity(len);
for s in self.iter() {
- result.push_str(s.as_slice())
+ result.push_str(s.as_slice());
}
result
}
}
+/// External iterator for a string's UTF16 codeunits.
+/// Use with the `std::iter` module.
+#[deriving(Clone)]
+pub struct Utf16Units<'a> {
+ encoder: Utf16Encoder<Chars<'a>>
+}
+
+impl<'a> Iterator<u16> for Utf16Units<'a> {
+ #[inline]
+ fn next(&mut self) -> Option<u16> { self.encoder.next() }
+
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
+}
+
/// Replaces all occurrences of one string with another.
///
/// # Arguments
/// let new_string = str::replace(string, "or", "str");
/// assert_eq!(new_string.as_slice(), "strange");
/// ```
+#[deprecated = "call the inherent method instead"]
pub fn replace(s: &str, from: &str, to: &str) -> String {
- let mut result = String::new();
- let mut last_end = 0;
- for (start, end) in s.match_indices(from) {
- result.push_str(unsafe { s.slice_unchecked(last_end, start) });
- result.push_str(to);
- last_end = end;
- }
- result.push_str(unsafe { s.slice_unchecked(last_end, s.len()) });
- result
+ s.replace(from, to)
}
/*
/// A string type that can hold either a `String` or a `&str`.
/// This can be useful as an optimization when an allocation is sometimes
/// needed but not always.
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use stding::string::CowString"]
pub enum MaybeOwned<'a> {
/// A borrowed string.
Slice(&'a str),
}
/// A specialization of `CowString` to be sendable.
+#[deprecated = "use std::string::CowString<'static>"]
pub type SendStr = CowString<'static>;
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
impl<'a> MaybeOwned<'a> {
/// Returns `true` if this `MaybeOwned` wraps an owned string.
///
/// Return the number of bytes in this string.
#[inline]
+ #[allow(deprecated)]
pub fn len(&self) -> uint { self.as_slice().len() }
/// Returns true if the string contains no bytes
fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
}
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
+#[allow(deprecated)]
impl<'a> PartialEq for MaybeOwned<'a> {
#[inline]
fn eq(&self, other: &MaybeOwned) -> bool {
}
}
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
impl<'a> Eq for MaybeOwned<'a> {}
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
impl<'a> PartialOrd for MaybeOwned<'a> {
#[inline]
fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
}
}
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
impl<'a> Ord for MaybeOwned<'a> {
#[inline]
+ #[allow(deprecated)]
fn cmp(&self, other: &MaybeOwned) -> Ordering {
self.as_slice().cmp(other.as_slice())
}
}
#[allow(deprecated)]
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
#[inline]
fn equiv(&self, other: &S) -> bool {
}
}
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
+#[allow(deprecated)]
impl<'a> Str for MaybeOwned<'a> {
- #[allow(deprecated)]
#[inline]
fn as_slice<'b>(&'b self) -> &'b str {
match *self {
}
}
-#[deprecated = "use std::str::CowString"]
-impl<'a> StrAllocating for MaybeOwned<'a> {
- #[allow(deprecated)]
- #[inline]
- fn into_string(self) -> String {
- match self {
- Slice(s) => String::from_str(s),
- Owned(s) => s
- }
- }
-}
-
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
impl<'a> Clone for MaybeOwned<'a> {
#[allow(deprecated)]
#[inline]
}
}
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
impl<'a> Default for MaybeOwned<'a> {
#[allow(deprecated)]
#[inline]
fn default() -> MaybeOwned<'a> { Slice("") }
}
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
+#[allow(deprecated)]
impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
#[inline]
fn hash(&self, hasher: &mut H) {
}
}
-#[deprecated = "use std::str::CowString"]
+#[deprecated = "use std::string::CowString"]
impl<'a> fmt::Show for MaybeOwned<'a> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
#[unstable = "trait is unstable"]
impl ToOwned<String> for str {
- fn to_owned(&self) -> String { self.into_string() }
+ fn to_owned(&self) -> String { self.to_string() }
}
/// Unsafe string operations.
+#[deprecated]
pub mod raw {
pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
pub use core::str::raw::{slice_unchecked};
*/
/// A clone-on-write string
+#[deprecated = "use std::string::CowString instead"]
pub type CowString<'a> = Cow<'a, String, str>;
-impl<'a> Str for CowString<'a> {
- #[inline]
- fn as_slice<'b>(&'b self) -> &'b str {
- (**self).as_slice()
- }
-}
-
/*
Section: Trait implementations
*/
/// Any string that can be represented as a slice.
-pub trait StrAllocating: Str {
- /// Converts `self` into a `String`, not making a copy if possible.
- fn into_string(self) -> String;
-
+pub trait StrExt for Sized?: Slice<uint, str> {
/// Escapes each char in `s` with `char::escape_default`.
+ #[unstable = "return type may change to be an iterator"]
fn escape_default(&self) -> String {
- let me = self.as_slice();
- let mut out = String::with_capacity(me.len());
- for c in me.chars() {
- for c in c.escape_default() {
- out.push(c);
- }
- }
- out
+ self.chars().flat_map(|c| c.escape_default()).collect()
}
/// Escapes each char in `s` with `char::escape_unicode`.
+ #[unstable = "return type may change to be an iterator"]
fn escape_unicode(&self) -> String {
- let me = self.as_slice();
- let mut out = String::with_capacity(me.len());
- for c in me.chars() {
- for c in c.escape_unicode() {
- out.push(c);
- }
- }
- out
+ self.chars().flat_map(|c| c.escape_unicode()).collect()
}
/// Replaces all occurrences of one string with another.
/// // not found, so no change.
/// assert_eq!(s.replace("cookie monster", "little lamb"), s);
/// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
fn replace(&self, from: &str, to: &str) -> String {
- replace(self.as_slice(), from, to)
+ let mut result = String::new();
+ let mut last_end = 0;
+ for (start, end) in self.match_indices(from) {
+ result.push_str(unsafe { self.slice_unchecked(last_end, start) });
+ result.push_str(to);
+ last_end = end;
+ }
+ result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
+ result
}
/// Given a string, makes a new string with repeated copies of it.
+ #[deprecated = "user repeat(self).take(n).collect() instead"]
fn repeat(&self, nn: uint) -> String {
- let me = self.as_slice();
- let mut ret = String::with_capacity(nn * me.len());
- for _ in range(0, nn) {
- ret.push_str(me);
- }
- ret
+ iter::repeat(self[]).take(nn).collect()
}
/// Returns the Levenshtein Distance between two strings.
+ #[deprecated = "this function will be removed"]
fn lev_distance(&self, t: &str) -> uint {
- let me = self.as_slice();
- if me.is_empty() { return t.char_len(); }
- if t.is_empty() { return me.char_len(); }
+ let me = self[];
+ if me.is_empty() { return t.chars().count(); }
+ if t.is_empty() { return me.chars().count(); }
let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
let mut t_last = 0;
/// Returns an iterator over the string in Unicode Normalization Form D
/// (canonical decomposition).
#[inline]
+ #[unstable = "this functionality may only be provided by libunicode"]
fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
Decompositions {
- iter: self.as_slice().chars(),
+ iter: self[].chars(),
buffer: Vec::new(),
sorted: false,
kind: Canonical
/// Returns an iterator over the string in Unicode Normalization Form KD
/// (compatibility decomposition).
#[inline]
+ #[unstable = "this functionality may only be provided by libunicode"]
fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
Decompositions {
- iter: self.as_slice().chars(),
+ iter: self[].chars(),
buffer: Vec::new(),
sorted: false,
kind: Compatible
/// An Iterator over the string in Unicode Normalization Form C
/// (canonical decomposition followed by canonical composition).
#[inline]
+ #[unstable = "this functionality may only be provided by libunicode"]
fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
Recompositions {
iter: self.nfd_chars(),
/// An Iterator over the string in Unicode Normalization Form KC
/// (compatibility decomposition followed by canonical composition).
#[inline]
+ #[unstable = "this functionality may only be provided by libunicode"]
fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
Recompositions {
iter: self.nfkd_chars(),
last_ccc: None
}
}
-}
-impl<'a> StrAllocating for &'a str {
+ /// Returns true if one string contains another
+ ///
+ /// # Arguments
+ ///
+ /// - needle - The string to look for
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert!("bananas".contains("nana"));
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn contains(&self, needle: &str) -> bool {
+ core_str::StrExt::contains(self[], needle)
+ }
+
+ /// Returns true if a string contains a char.
+ ///
+ /// # Arguments
+ ///
+ /// - needle - The char to look for
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert!("hello".contains_char('e'));
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn contains_char(&self, needle: char) -> bool {
+ core_str::StrExt::contains_char(self[], needle)
+ }
+
+ /// An iterator over the characters of `self`. Note, this iterates
+ /// over Unicode code-points, not Unicode graphemes.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let v: Vec<char> = "abc åäö".chars().collect();
+ /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
+ /// ```
+ #[stable]
+ fn chars(&self) -> Chars {
+ core_str::StrExt::chars(self[])
+ }
+
+ /// An iterator over the bytes of `self`
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let v: Vec<u8> = "bors".bytes().collect();
+ /// assert_eq!(v, b"bors".to_vec());
+ /// ```
+ #[stable]
+ fn bytes(&self) -> Bytes {
+ core_str::StrExt::bytes(self[])
+ }
+
+ /// An iterator over the characters of `self` and their byte offsets.
+ #[stable]
+ fn char_indices(&self) -> CharIndices {
+ core_str::StrExt::char_indices(self[])
+ }
+
+ /// An iterator over substrings of `self`, separated by characters
+ /// matched by `sep`.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
+ /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
+ ///
+ /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect();
+ /// assert_eq!(v, vec!["abc", "def", "ghi"]);
+ ///
+ /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
+ /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
+ ///
+ /// let v: Vec<&str> = "".split('X').collect();
+ /// assert_eq!(v, vec![""]);
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<Sep> {
+ core_str::StrExt::split(self[], sep)
+ }
+
+ /// An iterator over substrings of `self`, separated by characters
+ /// matched by `sep`, restricted to splitting at most `count`
+ /// times.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
+ /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
+ ///
+ /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |c: char| c.is_numeric()).collect();
+ /// assert_eq!(v, vec!["abc", "def2ghi"]);
+ ///
+ /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
+ /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
+ ///
+ /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
+ /// assert_eq!(v, vec!["abcXdef"]);
+ ///
+ /// let v: Vec<&str> = "".splitn(1, 'X').collect();
+ /// assert_eq!(v, vec![""]);
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn splitn<Sep: CharEq>(&self, count: uint, sep: Sep) -> CharSplitsN<Sep> {
+ core_str::StrExt::splitn(self[], count, sep)
+ }
+
+ /// An iterator over substrings of `self`, separated by characters
+ /// matched by `sep`.
+ ///
+ /// Equivalent to `split`, except that the trailing substring
+ /// is skipped if empty (terminator semantics).
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
+ /// assert_eq!(v, vec!["A", "B"]);
+ ///
+ /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
+ /// assert_eq!(v, vec!["A", "", "B", ""]);
+ ///
+ /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
+ /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
+ ///
+ /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).rev().collect();
+ /// assert_eq!(v, vec!["ghi", "def", "abc"]);
+ ///
+ /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
+ /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn split_terminator<Sep: CharEq>(&self, sep: Sep) -> CharSplits<Sep> {
+ core_str::StrExt::split_terminator(self[], sep)
+ }
+
+ /// An iterator over substrings of `self`, separated by characters
+ /// matched by `sep`, starting from the end of the string.
+ /// Restricted to splitting at most `count` times.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
+ /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
+ ///
+ /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |c: char| c.is_numeric()).collect();
+ /// assert_eq!(v, vec!["ghi", "abc1def"]);
+ ///
+ /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
+ /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn rsplitn<Sep: CharEq>(&self, count: uint, sep: Sep) -> CharSplitsN<Sep> {
+ core_str::StrExt::rsplitn(self[], count, sep)
+ }
+
+ /// An iterator over the start and end indices of the disjoint
+ /// matches of `sep` within `self`.
+ ///
+ /// That is, each returned value `(start, end)` satisfies
+ /// `self.slice(start, end) == sep`. For matches of `sep` within
+ /// `self` that overlap, only the indices corresponding to the
+ /// first match are returned.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
+ /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
+ ///
+ /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
+ /// assert_eq!(v, vec![(1,4), (4,7)]);
+ ///
+ /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
+ /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a> {
+ core_str::StrExt::match_indices(self[], sep)
+ }
+
+ /// An iterator over the substrings of `self` separated by `sep`.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
+ /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
+ ///
+ /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
+ /// assert_eq!(v, vec!["1", "", "2"]);
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn split_str<'a>(&'a self, s: &'a str) -> StrSplits<'a> {
+ core_str::StrExt::split_str(self[], s)
+ }
+
+ /// An iterator over the lines of a string (subsequences separated
+ /// by `\n`). This does not include the empty string after a
+ /// trailing `\n`.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let four_lines = "foo\nbar\n\nbaz\n";
+ /// let v: Vec<&str> = four_lines.lines().collect();
+ /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
+ /// ```
+ #[stable]
+ fn lines(&self) -> Lines {
+ core_str::StrExt::lines(self[])
+ }
+
+ /// An iterator over the lines of a string, separated by either
+ /// `\n` or `\r\n`. As with `.lines()`, this does not include an
+ /// empty trailing line.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
+ /// let v: Vec<&str> = four_lines.lines_any().collect();
+ /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
+ /// ```
+ #[stable]
+ fn lines_any(&self) -> LinesAny {
+ core_str::StrExt::lines_any(self[])
+ }
+
+ /// Returns the number of Unicode code points (`char`) that a
+ /// string holds.
+ ///
+ /// This does not perform any normalization, and is `O(n)`, since
+ /// UTF-8 is a variable width encoding of code points.
+ ///
+ /// *Warning*: The number of code points in a string does not directly
+ /// correspond to the number of visible characters or width of the
+ /// visible text due to composing characters, and double- and
+ /// zero-width ones.
+ ///
+ /// See also `.len()` for the byte length.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// // composed forms of `ö` and `é`
+ /// let c = "Löwe 老虎 Léopard"; // German, Simplified Chinese, French
+ /// // decomposed forms of `ö` and `é`
+ /// let d = "Lo\u0308we 老虎 Le\u0301opard";
+ ///
+ /// assert_eq!(c.char_len(), 15);
+ /// assert_eq!(d.char_len(), 17);
+ ///
+ /// assert_eq!(c.len(), 21);
+ /// assert_eq!(d.len(), 23);
+ ///
+ /// // the two strings *look* the same
+ /// println!("{}", c);
+ /// println!("{}", d);
+ /// ```
+ #[deprecated = "call .chars().count() instead"]
+ fn char_len(&self) -> uint {
+ core_str::StrExt::char_len(self[])
+ }
+
+ /// Returns a slice of the given string from the byte range
+ /// [`begin`..`end`).
+ ///
+ /// This operation is `O(1)`.
+ ///
+ /// Panics when `begin` and `end` do not point to valid characters
+ /// or point beyond the last character of the string.
+ ///
+ /// See also `slice_to` and `slice_from` for slicing prefixes and
+ /// suffixes of strings, and `slice_chars` for slicing based on
+ /// code point counts.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let s = "Löwe 老虎 Léopard";
+ /// assert_eq!(s.slice(0, 1), "L");
+ ///
+ /// assert_eq!(s.slice(1, 9), "öwe 老");
+ ///
+ /// // these will panic:
+ /// // byte 2 lies within `ö`:
+ /// // s.slice(2, 3);
+ ///
+ /// // byte 8 lies within `老`
+ /// // s.slice(1, 8);
+ ///
+ /// // byte 100 is outside the string
+ /// // s.slice(3, 100);
+ /// ```
+ #[unstable = "use slice notation [a..b] instead"]
+ fn slice(&self, begin: uint, end: uint) -> &str {
+ core_str::StrExt::slice(self[], begin, end)
+ }
+
+ /// Returns a slice of the string from `begin` to its end.
+ ///
+ /// Equivalent to `self.slice(begin, self.len())`.
+ ///
+ /// Panics when `begin` does not point to a valid character, or is
+ /// out of bounds.
+ ///
+ /// See also `slice`, `slice_to` and `slice_chars`.
+ #[unstable = "use slice notation [a..] instead"]
+ fn slice_from(&self, begin: uint) -> &str {
+ core_str::StrExt::slice_from(self[], begin)
+ }
+
+ /// Returns a slice of the string from the beginning to byte
+ /// `end`.
+ ///
+ /// Equivalent to `self.slice(0, end)`.
+ ///
+ /// Panics when `end` does not point to a valid character, or is
+ /// out of bounds.
+ ///
+ /// See also `slice`, `slice_from` and `slice_chars`.
+ #[unstable = "use slice notation [0..a] instead"]
+ fn slice_to(&self, end: uint) -> &str {
+ core_str::StrExt::slice_to(self[], end)
+ }
+
+ /// Returns a slice of the string from the character range
+ /// [`begin`..`end`).
+ ///
+ /// That is, start at the `begin`-th code point of the string and
+ /// continue to the `end`-th code point. This does not detect or
+ /// handle edge cases such as leaving a combining character as the
+ /// first code point of the string.
+ ///
+ /// Due to the design of UTF-8, this operation is `O(end)`.
+ /// See `slice`, `slice_to` and `slice_from` for `O(1)`
+ /// variants that use byte indices rather than code point
+ /// indices.
+ ///
+ /// Panics if `begin` > `end` or the either `begin` or `end` are
+ /// beyond the last character of the string.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let s = "Löwe 老虎 Léopard";
+ /// assert_eq!(s.slice_chars(0, 4), "Löwe");
+ /// assert_eq!(s.slice_chars(5, 7), "老虎");
+ /// ```
+ #[unstable = "may have yet to prove its worth"]
+ fn slice_chars(&self, begin: uint, end: uint) -> &str {
+ core_str::StrExt::slice_chars(self[], begin, end)
+ }
+
+ /// Takes a bytewise (not UTF-8) slice from a string.
+ ///
+ /// Returns the substring from [`begin`..`end`).
+ ///
+ /// Caller must check both UTF-8 character boundaries and the boundaries of
+ /// the entire slice as well.
+ #[stable]
+ unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
+ core_str::StrExt::slice_unchecked(self[], begin, end)
+ }
+
+ /// Returns true if `needle` is a prefix of the string.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert!("banana".starts_with("ba"));
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn starts_with(&self, needle: &str) -> bool {
+ core_str::StrExt::starts_with(self[], needle)
+ }
+
+ /// Returns true if `needle` is a suffix of the string.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert!("banana".ends_with("nana"));
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn ends_with(&self, needle: &str) -> bool {
+ core_str::StrExt::ends_with(self[], needle)
+ }
+
+ /// Returns a string with characters that match `to_trim` removed from the left and the right.
+ ///
+ /// # Arguments
+ ///
+ /// * to_trim - a character matcher
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar")
+ /// let x: &[_] = &['1', '2'];
+ /// assert_eq!("12foo1bar12".trim_chars(x), "foo1bar")
+ /// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_numeric()), "foo1bar")
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn trim_chars<C: CharEq>(&self, to_trim: C) -> &str {
+ core_str::StrExt::trim_chars(self[], to_trim)
+ }
+
+ /// Returns a string with leading `chars_to_trim` removed.
+ ///
+ /// # Arguments
+ ///
+ /// * to_trim - a character matcher
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11")
+ /// let x: &[_] = &['1', '2'];
+ /// assert_eq!("12foo1bar12".trim_left_chars(x), "foo1bar12")
+ /// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_numeric()), "foo1bar123")
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn trim_left_chars<C: CharEq>(&self, to_trim: C) -> &str {
+ core_str::StrExt::trim_left_chars(self[], to_trim)
+ }
+
+ /// Returns a string with trailing `chars_to_trim` removed.
+ ///
+ /// # Arguments
+ ///
+ /// * to_trim - a character matcher
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar")
+ /// let x: &[_] = &['1', '2'];
+ /// assert_eq!("12foo1bar12".trim_right_chars(x), "12foo1bar")
+ /// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_numeric()), "123foo1bar")
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn trim_right_chars<C: CharEq>(&self, to_trim: C) -> &str {
+ core_str::StrExt::trim_right_chars(self[], to_trim)
+ }
+
+ /// Check that `index`-th byte lies at the start and/or end of a
+ /// UTF-8 code point sequence.
+ ///
+ /// The start and end of the string (when `index == self.len()`)
+ /// are considered to be boundaries.
+ ///
+ /// Panics if `index` is greater than `self.len()`.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let s = "Löwe 老虎 Léopard";
+ /// assert!(s.is_char_boundary(0));
+ /// // start of `老`
+ /// assert!(s.is_char_boundary(6));
+ /// assert!(s.is_char_boundary(s.len()));
+ ///
+ /// // second byte of `ö`
+ /// assert!(!s.is_char_boundary(2));
+ ///
+ /// // third byte of `老`
+ /// assert!(!s.is_char_boundary(8));
+ /// ```
+ #[unstable = "naming is uncertain with container conventions"]
+ fn is_char_boundary(&self, index: uint) -> bool {
+ core_str::StrExt::is_char_boundary(self[], index)
+ }
+
+ /// Pluck a character out of a string and return the index of the next
+ /// character.
+ ///
+ /// This function can be used to iterate over the Unicode characters of a
+ /// string.
+ ///
+ /// # Example
+ ///
+ /// This example manually iterates through the characters of a
+ /// string; this should normally be done by `.chars()` or
+ /// `.char_indices`.
+ ///
+ /// ```rust
+ /// use std::str::CharRange;
+ ///
+ /// let s = "中华Việt Nam";
+ /// let mut i = 0u;
+ /// while i < s.len() {
+ /// let CharRange {ch, next} = s.char_range_at(i);
+ /// println!("{}: {}", i, ch);
+ /// i = next;
+ /// }
+ /// ```
+ ///
+ /// This outputs:
+ ///
+ /// ```text
+ /// 0: 中
+ /// 3: 华
+ /// 6: V
+ /// 7: i
+ /// 8: ệ
+ /// 11: t
+ /// 12:
+ /// 13: N
+ /// 14: a
+ /// 15: m
+ /// ```
+ ///
+ /// # Arguments
+ ///
+ /// * s - The string
+ /// * i - The byte offset of the char to extract
+ ///
+ /// # Return value
+ ///
+ /// A record {ch: char, next: uint} containing the char value and the byte
+ /// index of the next Unicode character.
+ ///
+ /// # Panics
+ ///
+ /// If `i` is greater than or equal to the length of the string.
+ /// If `i` is not the index of the beginning of a valid UTF-8 character.
+ #[unstable = "naming is uncertain with container conventions"]
+ fn char_range_at(&self, start: uint) -> CharRange {
+ core_str::StrExt::char_range_at(self[], start)
+ }
+
+ /// Given a byte position and a str, return the previous char and its position.
+ ///
+ /// This function can be used to iterate over a Unicode string in reverse.
+ ///
+ /// Returns 0 for next index if called on start index 0.
+ ///
+ /// # Panics
+ ///
+ /// If `i` is greater than the length of the string.
+ /// If `i` is not an index following a valid UTF-8 character.
+ #[unstable = "naming is uncertain with container conventions"]
+ fn char_range_at_reverse(&self, start: uint) -> CharRange {
+ core_str::StrExt::char_range_at_reverse(self[], start)
+ }
+
+ /// Plucks the character starting at the `i`th byte of a string.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let s = "abπc";
+ /// assert_eq!(s.char_at(1), 'b');
+ /// assert_eq!(s.char_at(2), 'π');
+ /// assert_eq!(s.char_at(4), 'c');
+ /// ```
+ ///
+ /// # Panics
+ ///
+ /// If `i` is greater than or equal to the length of the string.
+ /// If `i` is not the index of the beginning of a valid UTF-8 character.
+ #[unstable = "naming is uncertain with container conventions"]
+ fn char_at(&self, i: uint) -> char {
+ core_str::StrExt::char_at(self[], i)
+ }
+
+ /// Plucks the character ending at the `i`th byte of a string.
+ ///
+ /// # Panics
+ ///
+ /// If `i` is greater than the length of the string.
+ /// If `i` is not an index following a valid UTF-8 character.
+ #[unstable = "naming is uncertain with container conventions"]
+ fn char_at_reverse(&self, i: uint) -> char {
+ core_str::StrExt::char_at_reverse(self[], i)
+ }
+
+ /// Work with the byte buffer of a string as a byte slice.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert_eq!("bors".as_bytes(), b"bors");
+ /// ```
+ #[stable]
+ fn as_bytes(&self) -> &[u8] {
+ core_str::StrExt::as_bytes(self[])
+ }
+
+ /// Returns the byte index of the first character of `self` that
+ /// matches `search`.
+ ///
+ /// # Return value
+ ///
+ /// `Some` containing the byte index of the last matching character
+ /// or `None` if there is no match
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let s = "Löwe 老虎 Léopard";
+ ///
+ /// assert_eq!(s.find('L'), Some(0));
+ /// assert_eq!(s.find('é'), Some(14));
+ ///
+ /// // the first space
+ /// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5));
+ ///
+ /// // neither are found
+ /// let x: &[_] = &['1', '2'];
+ /// assert_eq!(s.find(x), None);
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn find<C: CharEq>(&self, search: C) -> Option<uint> {
+ core_str::StrExt::find(self[], search)
+ }
+
+ /// Returns the byte index of the last character of `self` that
+ /// matches `search`.
+ ///
+ /// # Return value
+ ///
+ /// `Some` containing the byte index of the last matching character
+ /// or `None` if there is no match.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let s = "Löwe 老虎 Léopard";
+ ///
+ /// assert_eq!(s.rfind('L'), Some(13));
+ /// assert_eq!(s.rfind('é'), Some(14));
+ ///
+ /// // the second space
+ /// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12));
+ ///
+ /// // searches for an occurrence of either `1` or `2`, but neither are found
+ /// let x: &[_] = &['1', '2'];
+ /// assert_eq!(s.rfind(x), None);
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
+ core_str::StrExt::rfind(self[], search)
+ }
+
+ /// Returns the byte index of the first matching substring
+ ///
+ /// # Arguments
+ ///
+ /// * `needle` - The string to search for
+ ///
+ /// # Return value
+ ///
+ /// `Some` containing the byte index of the first matching substring
+ /// or `None` if there is no match.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let s = "Löwe 老虎 Léopard";
+ ///
+ /// assert_eq!(s.find_str("老虎 L"), Some(6));
+ /// assert_eq!(s.find_str("muffin man"), None);
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn find_str(&self, needle: &str) -> Option<uint> {
+ core_str::StrExt::find_str(self[], needle)
+ }
+
+ /// Retrieves the first character from a string slice and returns
+ /// it. This does not allocate a new string; instead, it returns a
+ /// slice that point one character beyond the character that was
+ /// shifted. If the string does not contain any characters,
+ /// None is returned instead.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let s = "Löwe 老虎 Léopard";
+ /// let (c, s1) = s.slice_shift_char().unwrap();
+ /// assert_eq!(c, 'L');
+ /// assert_eq!(s1, "öwe 老虎 Léopard");
+ ///
+ /// let (c, s2) = s1.slice_shift_char().unwrap();
+ /// assert_eq!(c, 'ö');
+ /// assert_eq!(s2, "we 老虎 Léopard");
+ /// ```
+ #[unstable = "awaiting conventions about shifting and slices"]
+ fn slice_shift_char(&self) -> Option<(char, &str)> {
+ core_str::StrExt::slice_shift_char(self[])
+ }
+
+ /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
+ ///
+ /// Panics if `inner` is not a direct slice contained within self.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let string = "a\nb\nc";
+ /// let lines: Vec<&str> = string.lines().collect();
+ ///
+ /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
+ /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
+ /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
+ /// ```
+ #[unstable = "awaiting pattern/matcher stabilization"]
+ fn subslice_offset(&self, inner: &str) -> uint {
+ core_str::StrExt::subslice_offset(self[], inner)
+ }
+
+ /// Return an unsafe pointer to the strings buffer.
+ ///
+ /// The caller must ensure that the string outlives this pointer,
+ /// and that it is not reallocated (e.g. by pushing to the
+ /// string).
+ #[stable]
#[inline]
- fn into_string(self) -> String {
- String::from_str(self)
+ fn as_ptr(&self) -> *const u8 {
+ core_str::StrExt::as_ptr(self[])
+ }
+
+ /// Return an iterator of `u16` over the string encoded as UTF-16.
+ #[unstable = "this functionality may only be provided by libunicode"]
+ fn utf16_units(&self) -> Utf16Units {
+ Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
+ }
+
+ /// Return the number of bytes in this string
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// assert_eq!("foo".len(), 3);
+ /// assert_eq!("ƒoo".len(), 4);
+ /// ```
+ #[stable]
+ #[inline]
+ fn len(&self) -> uint {
+ core_str::StrExt::len(self[])
+ }
+
+ /// Returns true if this slice contains no bytes
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// assert!("".is_empty());
+ /// ```
+ #[inline]
+ #[stable]
+ fn is_empty(&self) -> bool {
+ core_str::StrExt::is_empty(self[])
+ }
+
+ /// Parse this string into the specified type.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// assert_eq!("4".parse::<u32>(), Some(4));
+ /// assert_eq!("j".parse::<u32>(), None);
+ /// ```
+ #[inline]
+ #[unstable = "this method was just created"]
+ fn parse<F: FromStr>(&self) -> Option<F> {
+ FromStr::from_str(self[])
+ }
+
+ /// Returns an iterator over the
+ /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
+ /// of the string.
+ ///
+ /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
+ /// otherwise, the iterator is over the *legacy grapheme clusters*.
+ /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
+ /// recommends extended grapheme cluster boundaries for general processing.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let gr1 = "a\u0310e\u0301o\u0308\u0332".graphemes(true).collect::<Vec<&str>>();
+ /// let b: &[_] = &["a\u0310", "e\u0301", "o\u0308\u0332"];
+ /// assert_eq!(gr1.as_slice(), b);
+ /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
+ /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
+ /// assert_eq!(gr2.as_slice(), b);
+ /// ```
+ #[unstable = "this functionality may only be provided by libunicode"]
+ fn graphemes(&self, is_extended: bool) -> Graphemes {
+ UnicodeStr::graphemes(self[], is_extended)
+ }
+
+ /// Returns an iterator over the grapheme clusters of self and their byte offsets.
+ /// See `graphemes()` method for more information.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
+ /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
+ /// assert_eq!(gr_inds.as_slice(), b);
+ /// ```
+ #[unstable = "this functionality may only be provided by libunicode"]
+ fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
+ UnicodeStr::grapheme_indices(self[], is_extended)
+ }
+
+ /// An iterator over the words of a string (subsequences separated
+ /// by any sequence of whitespace). Sequences of whitespace are
+ /// collapsed, so empty "words" are not included.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// let some_words = " Mary had\ta little \n\t lamb";
+ /// let v: Vec<&str> = some_words.words().collect();
+ /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
+ /// ```
+ #[stable]
+ fn words(&self) -> Words {
+ UnicodeStr::words(self[])
+ }
+
+ /// Returns true if the string contains only whitespace.
+ ///
+ /// Whitespace characters are determined by `char::is_whitespace`.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert!(" \t\n".is_whitespace());
+ /// assert!("".is_whitespace());
+ ///
+ /// assert!( !"abc".is_whitespace());
+ /// ```
+ #[deprecated = "use .chars().all(|c| c.is_whitespace())"]
+ fn is_whitespace(&self) -> bool {
+ UnicodeStr::is_whitespace(self[])
+ }
+
+ /// Returns true if the string contains only alphanumeric code
+ /// points.
+ ///
+ /// Alphanumeric characters are determined by `char::is_alphanumeric`.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// assert!("Löwe老虎Léopard123".is_alphanumeric());
+ /// assert!("".is_alphanumeric());
+ ///
+ /// assert!( !" &*~".is_alphanumeric());
+ /// ```
+ #[deprecated = "use .chars().all(|c| c.is_alphanumeric())"]
+ fn is_alphanumeric(&self) -> bool {
+ UnicodeStr::is_alphanumeric(self[])
+ }
+
+ /// Returns a string's displayed width in columns, treating control
+ /// characters as zero-width.
+ ///
+ /// `is_cjk` determines behavior for characters in the Ambiguous category:
+ /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
+ /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
+ /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
+ /// recommends that these characters be treated as 1 column (i.e.,
+ /// `is_cjk` = `false`) if the locale is unknown.
+ #[unstable = "this functionality may only be provided by libunicode"]
+ fn width(&self, is_cjk: bool) -> uint {
+ UnicodeStr::width(self[], is_cjk)
+ }
+
+ /// Returns a string with leading and trailing whitespace removed.
+ #[stable]
+ fn trim(&self) -> &str {
+ UnicodeStr::trim(self[])
+ }
+
+ /// Returns a string with leading whitespace removed.
+ #[stable]
+ fn trim_left(&self) -> &str {
+ UnicodeStr::trim_left(self[])
+ }
+
+ /// Returns a string with trailing whitespace removed.
+ #[stable]
+ fn trim_right(&self) -> &str {
+ UnicodeStr::trim_right(self[])
}
}
+impl StrExt for str {}
+
#[cfg(test)]
mod tests {
use prelude::*;
assert!(!"".contains_char('a'));
}
- #[test]
- fn test_truncate_utf16_at_nul() {
- let v = [];
- let b: &[u16] = &[];
- assert_eq!(truncate_utf16_at_nul(&v), b);
-
- let v = [0, 2, 3];
- assert_eq!(truncate_utf16_at_nul(&v), b);
-
- let v = [1, 0, 3];
- let b: &[u16] = &[1];
- assert_eq!(truncate_utf16_at_nul(&v), b);
-
- let v = [1, 2, 0];
- let b: &[u16] = &[1, 2];
- assert_eq!(truncate_utf16_at_nul(&v), b);
-
- let v = [1, 2, 3];
- let b: &[u16] = &[1, 2, 3];
- assert_eq!(truncate_utf16_at_nul(&v), b);
- }
-
#[test]
fn test_char_at() {
let s = "ศไทย中华Việt Nam";
assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
}
- #[test]
- fn test_lev_distance() {
- use core::char::{ from_u32, MAX };
- // Test bytelength agnosticity
- for c in range(0u32, MAX as u32)
- .filter_map(|i| from_u32(i))
- .map(|i| String::from_char(1, i)) {
- assert_eq!(c[].lev_distance(c[]), 0);
- }
-
- let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
- let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
- let c = "Mary häd ä little lämb\n\nLittle lämb\n";
- assert_eq!(a.lev_distance(b), 1);
- assert_eq!(b.lev_distance(a), 1);
- assert_eq!(a.lev_distance(c), 2);
- assert_eq!(c.lev_distance(a), 2);
- assert_eq!(b.lev_distance(c), 1);
- assert_eq!(c.lev_distance(b), 1);
- }
-
#[test]
fn test_nfd_chars() {
macro_rules! t {
use core::mem;
use core::ptr;
use core::ops;
-// FIXME: ICE's abound if you import the `Slice` type while importing `Slice` trait
use core::raw::Slice as RawSlice;
+use unicode::str as unicode_str;
+use unicode::str::Utf16Item;
use slice::CloneSliceExt;
-use str;
-use str::{CharRange, CowString, FromStr, StrAllocating};
-use str::MaybeOwned::Owned;
+use str::{mod, CharRange, FromStr, StrExt, Owned, Utf8Error};
use vec::{DerefVec, Vec, as_vec};
/// A growable string stored as a UTF-8 encoded buffer.
/// Returns the vector as a string buffer, if possible, taking care not to
/// copy it.
///
- /// Returns `Err` with the original vector if the vector contains invalid
- /// UTF-8.
+ /// # Failure
+ ///
+ /// If the given vector is not valid UTF-8, then the original vector and the
+ /// corresponding error is returned.
///
/// # Examples
///
/// ```
#[inline]
#[unstable = "error type may change"]
- pub fn from_utf8(vec: Vec<u8>) -> Result<String, Vec<u8>> {
- if str::is_utf8(vec.as_slice()) {
- Ok(String { vec: vec })
- } else {
- Err(vec)
+ pub fn from_utf8(vec: Vec<u8>) -> Result<String, (Vec<u8>, Utf8Error)> {
+ match str::from_utf8(vec.as_slice()) {
+ Ok(..) => Ok(String { vec: vec }),
+ Err(e) => Err((vec, e))
}
}
/// ```
#[unstable = "return type may change"]
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
- if str::is_utf8(v) {
- return Cow::Borrowed(unsafe { mem::transmute(v) })
+ match str::from_utf8(v) {
+ Ok(s) => return Cow::Borrowed(s),
+ Err(..) => {}
}
static TAG_CONT_U8: u8 = 128u8;
if byte < 128u8 {
// subseqidx handles this
} else {
- let w = str::utf8_char_width(byte);
+ let w = unicode_str::utf8_char_width(byte);
match w {
2 => {
res.as_mut_vec().push_all(v[subseqidx..total])
};
}
- Cow::Owned(res.into_string())
+ Cow::Owned(res)
}
/// Decode a UTF-16 encoded vector `v` into a `String`, returning `None`
#[unstable = "error value in return may change"]
pub fn from_utf16(v: &[u16]) -> Option<String> {
let mut s = String::with_capacity(v.len());
- for c in str::utf16_items(v) {
+ for c in unicode_str::utf16_items(v) {
match c {
- str::ScalarValue(c) => s.push(c),
- str::LoneSurrogate(_) => return None
+ Utf16Item::ScalarValue(c) => s.push(c),
+ Utf16Item::LoneSurrogate(_) => return None
}
}
Some(s)
/// ```
#[stable]
pub fn from_utf16_lossy(v: &[u16]) -> String {
- str::utf16_items(v).map(|c| c.to_char_lossy()).collect()
+ unicode_str::utf16_items(v).map(|c| c.to_char_lossy()).collect()
}
/// Convert a vector of `char`s to a `String`.
}
#[experimental = "waiting on Str stabilization"]
+#[allow(deprecated)]
impl Str for String {
#[inline]
#[stable]
fn as_slice<'a>(&'a self) -> &'a str {
- unsafe {
- mem::transmute(self.vec.as_slice())
- }
- }
-}
-
-#[experimental = "waiting on StrAllocating stabilization"]
-impl StrAllocating for String {
- #[inline]
- fn into_string(self) -> String {
- self
+ unsafe { mem::transmute(self.vec.as_slice()) }
}
}
#[experimental = "waiting on Show stabilization"]
impl fmt::Show for String {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- self.as_slice().fmt(f)
+ (*self).fmt(f)
}
}
impl<H: hash::Writer> hash::Hash<H> for String {
#[inline]
fn hash(&self, hasher: &mut H) {
- self.as_slice().hash(hasher)
+ (*self).hash(hasher)
}
}
impl ops::Slice<uint, str> for String {
#[inline]
fn as_slice_<'a>(&'a self) -> &'a str {
- self.as_slice()
+ unsafe { mem::transmute(self.vec.as_slice()) }
}
#[inline]
#[experimental = "waiting on Deref stabilization"]
impl ops::Deref<str> for String {
- fn deref<'a>(&'a self) -> &'a str { self.as_slice() }
+ fn deref<'a>(&'a self) -> &'a str {
+ unsafe { mem::transmute(self.vec[]) }
+ }
}
/// Wrapper type providing a `&String` reference via `Deref`.
}
}
+/// A clone-on-write string
+#[stable]
+pub type CowString<'a> = Cow<'a, String, str>;
+
+#[allow(deprecated)]
+impl<'a> Str for CowString<'a> {
+ #[inline]
+ fn as_slice<'b>(&'b self) -> &'b str {
+ (**self).as_slice()
+ }
+}
+
#[cfg(test)]
mod tests {
use prelude::*;
use ops::FnOnce;
use result::Result::Ok;
use slice::{mod, SliceExt};
-use str::StrPrelude;
+use str::StrExt;
/// A flag that specifies whether to use exponential (scientific) notation.
pub enum ExponentFormat {
use result;
use slice::SliceExt;
use slice;
-use str::StrPrelude;
+use str::{StrExt, Utf8Error};
pub use self::num::radix;
pub use self::num::Radix;
}
}
+impl Show for Utf8Error {
+ fn fmt(&self, f: &mut Formatter) -> Result {
+ match *self {
+ Utf8Error::InvalidByte(n) => {
+ write!(f, "invalid utf-8: invalid byte at index {}", n)
+ }
+ Utf8Error::TooShort => {
+ write!(f, "invalid utf-8: byte slice too short")
+ }
+ }
+ }
+}
+
// If you expected tests to be here, look instead at the run-pass/ifmt.rs test,
// it's a lot easier than creating all of the rt::Piece structures here.
use ops::{Not, BitAnd, BitOr, BitXor, Shl, Shr};
use option::Option;
use option::Option::{Some, None};
-use str::{FromStr, from_str, StrPrelude};
+use str::{FromStr, from_str, StrExt};
/// Simultaneous division and remainder
#[inline]
pub use ptr::RawPtr;
pub use result::Result;
pub use result::Result::{Ok, Err};
-pub use str::{Str, StrPrelude};
+pub use str::{Str, StrExt};
pub use tuple::{Tuple1, Tuple2, Tuple3, Tuple4};
pub use tuple::{Tuple5, Tuple6, Tuple7, Tuple8};
pub use tuple::{Tuple9, Tuple10, Tuple11, Tuple12};
#![doc(primitive = "str")]
-pub use self::Utf16Item::*;
-pub use self::Searcher::{Naive, TwoWay, TwoWayLong};
+use self::Searcher::{Naive, TwoWay, TwoWayLong};
-use char::Char;
-use char;
+use char::{mod, Char};
use clone::Clone;
-use cmp::{Eq, mod};
+use cmp::{mod, Eq};
use default::Default;
-use iter::{Map, Iterator, IteratorExt, DoubleEndedIterator};
-use iter::{DoubleEndedIteratorExt, ExactSizeIterator};
use iter::range;
-use kinds::Sized;
+use iter::{DoubleEndedIteratorExt, ExactSizeIterator};
+use iter::{Map, Iterator, IteratorExt, DoubleEndedIterator};
+use kinds::{Copy, Sized};
use mem;
use num::Int;
-use option::Option;
-use option::Option::{None, Some};
use ops::{Fn, FnMut};
+use option::Option::{mod, None, Some};
use ptr::RawPtr;
use raw::{Repr, Slice};
+use result::Result::{mod, Ok, Err};
use slice::{mod, SliceExt};
use uint;
/// A trait to abstract the idea of creating a new instance of a type from a
/// string.
-#[experimental = "might need to return Result"]
+// FIXME(#17307): there should be an `E` associated type for a `Result` return
+#[unstable = "will return a Result once associated types are working"]
pub trait FromStr {
/// Parses a string `s` to return an optional value of this type. If the
/// string is ill-formatted, the None is returned.
}
/// A utility function that just calls FromStr::from_str
+#[deprecated = "call the .parse() method on the string instead"]
pub fn from_str<A: FromStr>(s: &str) -> Option<A> {
FromStr::from_str(s)
}
Section: Creating a string
*/
-/// Converts a slice of bytes to a string slice without performing any allocations.
+/// Errors which can occur when attempting to interpret a byte slice as a `str`.
+pub enum Utf8Error {
+ /// An invalid byte was detected at the byte offset given.
+ ///
+ /// The offset is guaranteed to be in bounds of the slice in question, and
+ /// the byte at the specified offset was the first invalid byte in the
+ /// sequence detected.
+ InvalidByte(uint),
+
+ /// The byte slice was invalid because more bytes were needed but no more
+ /// bytes were available.
+ TooShort,
+}
+
+/// Converts a slice of bytes to a string slice without performing any
+/// allocations.
///
/// Once the slice has been validated as utf-8, it is transmuted in-place and
/// returned as a '&str' instead of a '&[u8]'
///
-/// Returns None if the slice is not utf-8.
-pub fn from_utf8<'a>(v: &'a [u8]) -> Option<&'a str> {
- if is_utf8(v) {
- Some(unsafe { from_utf8_unchecked(v) })
- } else {
- None
- }
+/// # Failure
+///
+/// Returns `Err` if the slice is not utf-8 with a description as to why the
+/// provided slice is not utf-8.
+pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
+ try!(run_utf8_validation_iterator(&mut v.iter()));
+ Ok(unsafe { from_utf8_unchecked(v) })
}
/// Converts a slice of bytes to a string slice without checking
/// that the string contains valid UTF-8.
+#[stable]
pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
mem::transmute(v)
}
/// # Panics
///
/// This function will panic if the string pointed to by `s` is not valid UTF-8.
+#[unstable = "may change location based on the outcome of the c_str module"]
pub unsafe fn from_c_str(s: *const i8) -> &'static str {
let s = s as *const u8;
let mut len = 0u;
len += 1u;
}
let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
- from_utf8(v).expect("from_c_str passed invalid utf-8 data")
+ from_utf8(v).ok().expect("from_c_str passed invalid utf-8 data")
}
/// Something that can be used to compare against a character
+#[unstable = "definition may change as pattern-related methods are stabilized"]
pub trait CharEq {
/// Determine if the splitter should split at the given character
fn matches(&mut self, char) -> bool;
/// External iterator for a string's characters and their byte offsets.
/// Use with the `std::iter` module.
#[deriving(Clone)]
-pub struct CharOffsets<'a> {
+pub struct CharIndices<'a> {
front_offset: uint,
iter: Chars<'a>,
}
-impl<'a> Iterator<(uint, char)> for CharOffsets<'a> {
+impl<'a> Iterator<(uint, char)> for CharIndices<'a> {
#[inline]
fn next(&mut self) -> Option<(uint, char)> {
let (pre_len, _) = self.iter.iter.size_hint();
}
}
-impl<'a> DoubleEndedIterator<(uint, char)> for CharOffsets<'a> {
+impl<'a> DoubleEndedIterator<(uint, char)> for CharIndices<'a> {
#[inline]
fn next_back(&mut self) -> Option<(uint, char)> {
match self.iter.next_back() {
/// External iterator for a string's bytes.
/// Use with the `std::iter` module.
-pub type Bytes<'a> = Map<&'a u8, u8, slice::Items<'a, u8>, BytesFn>;
+#[stable]
+pub struct Bytes<'a> {
+ inner: Map<&'a u8, u8, slice::Items<'a, u8>, BytesFn>,
+}
/// A temporary new type wrapper that ensures that the `Bytes` iterator
/// is cloneable.
#[deriving(Copy)]
-#[experimental = "iterator type instability"]
-pub struct BytesFn(fn(&u8) -> u8);
+struct BytesFn(fn(&u8) -> u8);
impl<'a> Fn(&'a u8) -> u8 for BytesFn {
extern "rust-call" fn call(&self, (ptr,): (&'a u8,)) -> u8 {
invert: bool,
}
+/// An iterator over the lines of a string, separated by `\n`.
+#[stable]
+pub struct Lines<'a> {
+ inner: CharSplits<'a, char>,
+}
+
/// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
-pub type AnyLines<'a> = Map<&'a str, &'a str, CharSplits<'a, char>, fn(&str) -> &str>;
+#[stable]
+pub struct LinesAny<'a> {
+ inner: Map<&'a str, &'a str, Lines<'a>, fn(&str) -> &str>,
+}
impl<'a, Sep> CharSplits<'a, Sep> {
#[inline]
}
}
-/// External iterator for a string's UTF16 codeunits.
-/// Use with the `std::iter` module.
-#[deriving(Clone)]
-pub struct Utf16CodeUnits<'a> {
- encoder: Utf16Encoder<Chars<'a>>
-}
-
-impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
- #[inline]
- fn next(&mut self) -> Option<u16> { self.encoder.next() }
-
- #[inline]
- fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
-}
-
-
-/// Iterator adaptor for encoding `char`s to UTF-16.
-#[deriving(Clone)]
-pub struct Utf16Encoder<I> {
- chars: I,
- extra: u16
-}
-
-impl<I> Utf16Encoder<I> {
- /// Create an UTF-16 encoder from any `char` iterator.
- pub fn new(chars: I) -> Utf16Encoder<I> where I: Iterator<char> {
- Utf16Encoder { chars: chars, extra: 0 }
- }
-}
-
-impl<I> Iterator<u16> for Utf16Encoder<I> where I: Iterator<char> {
- #[inline]
- fn next(&mut self) -> Option<u16> {
- if self.extra != 0 {
- let tmp = self.extra;
- self.extra = 0;
- return Some(tmp);
- }
-
- let mut buf = [0u16, ..2];
- self.chars.next().map(|ch| {
- let n = ch.encode_utf16(buf[mut]).unwrap_or(0);
- if n == 2 { self.extra = buf[1]; }
- buf[0]
- })
- }
-
- #[inline]
- fn size_hint(&self) -> (uint, Option<uint>) {
- let (low, high) = self.chars.size_hint();
- // every char gets either one u16 or two u16,
- // so this iterator is between 1 or 2 times as
- // long as the underlying iterator.
- (low, high.and_then(|n| n.checked_mul(2)))
- }
-}
-
/*
Section: Comparing strings
*/
/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
#[lang="str_eq"]
#[inline]
-pub fn eq_slice(a: &str, b: &str) -> bool {
+fn eq_slice(a: &str, b: &str) -> bool {
eq_slice_(a, b)
}
/// `iter` reset such that it is pointing at the first byte in the
/// invalid sequence.
#[inline(always)]
-fn run_utf8_validation_iterator(iter: &mut slice::Items<u8>) -> bool {
+fn run_utf8_validation_iterator(iter: &mut slice::Items<u8>)
+ -> Result<(), Utf8Error> {
+ let whole = iter.as_slice();
loop {
// save the current thing we're pointing at.
let old = *iter;
// restore the iterator we had at the start of this codepoint.
- macro_rules! err ( () => { {*iter = old; return false} });
+ macro_rules! err (() => { {
+ *iter = old;
+ return Err(Utf8Error::InvalidByte(whole.len() - iter.as_slice().len()))
+ } });
macro_rules! next ( () => {
- match iter.next() {
- Some(a) => *a,
- // we needed data, but there was none: error!
- None => err!()
- }
- });
+ match iter.next() {
+ Some(a) => *a,
+ // we needed data, but there was none: error!
+ None => return Err(Utf8Error::TooShort),
+ }
+ });
let first = match iter.next() {
Some(&b) => b,
// we're at the end of the iterator and a codepoint
// boundary at the same time, so this string is valid.
- None => return true
+ None => return Ok(())
};
// ASCII characters are always valid, so only large
// bytes need more examination.
if first >= 128 {
- let w = utf8_char_width(first);
+ let w = UTF8_CHAR_WIDTH[first as uint] as uint;
let second = next!();
// 2-byte encoding is for codepoints \u{0080} to \u{07ff}
// first C2 80 last DF BF
}
/// Determines if a vector of bytes contains valid UTF-8.
+#[deprecated = "call from_utf8 instead"]
pub fn is_utf8(v: &[u8]) -> bool {
- run_utf8_validation_iterator(&mut v.iter())
-}
-
-/// Determines if a vector of `u16` contains valid UTF-16
-pub fn is_utf16(v: &[u16]) -> bool {
- let mut it = v.iter();
- macro_rules! next ( ($ret:expr) => {
- match it.next() { Some(u) => *u, None => return $ret }
- }
- );
- loop {
- let u = next!(true);
-
- match char::from_u32(u as u32) {
- Some(_) => {}
- None => {
- let u2 = next!(false);
- if u < 0xD7FF || u > 0xDBFF ||
- u2 < 0xDC00 || u2 > 0xDFFF { return false; }
- }
- }
- }
-}
-
-/// An iterator that decodes UTF-16 encoded codepoints from a vector
-/// of `u16`s.
-#[deriving(Clone)]
-pub struct Utf16Items<'a> {
- iter: slice::Items<'a, u16>
-}
-/// The possibilities for values decoded from a `u16` stream.
-#[deriving(Copy, PartialEq, Eq, Clone, Show)]
-pub enum Utf16Item {
- /// A valid codepoint.
- ScalarValue(char),
- /// An invalid surrogate without its pair.
- LoneSurrogate(u16)
-}
-
-impl Utf16Item {
- /// Convert `self` to a `char`, taking `LoneSurrogate`s to the
- /// replacement character (U+FFFD).
- #[inline]
- pub fn to_char_lossy(&self) -> char {
- match *self {
- ScalarValue(c) => c,
- LoneSurrogate(_) => '\u{FFFD}'
- }
- }
-}
-
-impl<'a> Iterator<Utf16Item> for Utf16Items<'a> {
- fn next(&mut self) -> Option<Utf16Item> {
- let u = match self.iter.next() {
- Some(u) => *u,
- None => return None
- };
-
- if u < 0xD800 || 0xDFFF < u {
- // not a surrogate
- Some(ScalarValue(unsafe {mem::transmute(u as u32)}))
- } else if u >= 0xDC00 {
- // a trailing surrogate
- Some(LoneSurrogate(u))
- } else {
- // preserve state for rewinding.
- let old = self.iter;
-
- let u2 = match self.iter.next() {
- Some(u2) => *u2,
- // eof
- None => return Some(LoneSurrogate(u))
- };
- if u2 < 0xDC00 || u2 > 0xDFFF {
- // not a trailing surrogate so we're not a valid
- // surrogate pair, so rewind to redecode u2 next time.
- self.iter = old;
- return Some(LoneSurrogate(u))
- }
-
- // all ok, so lets decode it.
- let c = ((u - 0xD800) as u32 << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
- Some(ScalarValue(unsafe {mem::transmute(c)}))
- }
- }
-
- #[inline]
- fn size_hint(&self) -> (uint, Option<uint>) {
- let (low, high) = self.iter.size_hint();
- // we could be entirely valid surrogates (2 elements per
- // char), or entirely non-surrogates (1 element per char)
- (low / 2, high)
- }
-}
-
-/// Create an iterator over the UTF-16 encoded codepoints in `v`,
-/// returning invalid surrogates as `LoneSurrogate`s.
-///
-/// # Example
-///
-/// ```rust
-/// use std::str;
-/// use std::str::{ScalarValue, LoneSurrogate};
-///
-/// // 𝄞mus<invalid>ic<invalid>
-/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
-/// 0x0073, 0xDD1E, 0x0069, 0x0063,
-/// 0xD834];
-///
-/// assert_eq!(str::utf16_items(&v).collect::<Vec<_>>(),
-/// vec![ScalarValue('𝄞'),
-/// ScalarValue('m'), ScalarValue('u'), ScalarValue('s'),
-/// LoneSurrogate(0xDD1E),
-/// ScalarValue('i'), ScalarValue('c'),
-/// LoneSurrogate(0xD834)]);
-/// ```
-pub fn utf16_items<'a>(v: &'a [u16]) -> Utf16Items<'a> {
- Utf16Items { iter : v.iter() }
+ run_utf8_validation_iterator(&mut v.iter()).is_ok()
}
/// Return a slice of `v` ending at (and not including) the first NUL
/// let b: &[_] = &['a' as u16, 'b' as u16];
/// assert_eq!(str::truncate_utf16_at_nul(&v), b);
/// ```
+#[deprecated = "this function will be removed"]
pub fn truncate_utf16_at_nul<'a>(v: &'a [u16]) -> &'a [u16] {
match v.iter().position(|c| *c == 0) {
// don't include the 0
/// Given a first byte, determine how many bytes are in this UTF-8 character
#[inline]
+#[deprecated = "this function has moved to libunicode"]
pub fn utf8_char_width(b: u8) -> uint {
return UTF8_CHAR_WIDTH[b as uint] as uint;
}
/// the next `char` in a string. This can be used as a data structure
/// for iterating over the UTF-8 bytes of a string.
#[deriving(Copy)]
+#[unstable = "naming is uncertain with container conventions"]
pub struct CharRange {
/// Current `char`
pub ch: char,
use ptr::RawPtr;
use raw::Slice;
use slice::SliceExt;
- use str::{is_utf8, StrPrelude};
+ use str::StrExt;
/// Converts a slice of bytes to a string slice without checking
/// that the string contains valid UTF-8.
curr = s.offset(len as int);
}
let v = Slice { data: s, len: len };
- assert!(is_utf8(::mem::transmute(v)));
- ::mem::transmute(v)
+ super::from_utf8(::mem::transmute(v)).unwrap()
}
/// Takes a bytewise (not UTF-8) slice from a string.
use option::Option;
use option::Option::Some;
use ops;
- use str::{Str, StrPrelude, eq_slice};
+ use str::{Str, StrExt, eq_slice};
impl Ord for str {
#[inline]
}
/// Any string that can be represented as a slice
+#[unstable = "Instead of taking this bound generically, this trait will be \
+ replaced with one of slicing syntax, deref coercions, or \
+ a more generic conversion trait"]
pub trait Str for Sized? {
/// Work with `self` as a slice.
fn as_slice<'a>(&'a self) -> &'a str;
}
+#[allow(deprecated)]
impl Str for str {
#[inline]
fn as_slice<'a>(&'a self) -> &'a str { self }
}
+#[allow(deprecated)]
impl<'a, Sized? S> Str for &'a S where S: Str {
#[inline]
fn as_slice(&self) -> &str { Str::as_slice(*self) }
}
/// Methods for string slices
-pub trait StrPrelude for Sized? {
- /// Returns true if one string contains another
- ///
- /// # Arguments
- ///
- /// - needle - The string to look for
- ///
- /// # Example
- ///
- /// ```rust
- /// assert!("bananas".contains("nana"));
- /// ```
- fn contains(&self, needle: &str) -> bool;
+#[allow(missing_docs)]
+pub trait StrExt for Sized? {
+ // NB there are no docs here are they're all located on the StrExt trait in
+ // libcollections, not here.
- /// Returns true if a string contains a char.
- ///
- /// # Arguments
- ///
- /// - needle - The char to look for
- ///
- /// # Example
- ///
- /// ```rust
- /// assert!("hello".contains_char('e'));
- /// ```
+ fn contains(&self, needle: &str) -> bool;
fn contains_char(&self, needle: char) -> bool;
-
- /// An iterator over the characters of `self`. Note, this iterates
- /// over Unicode code-points, not Unicode graphemes.
- ///
- /// # Example
- ///
- /// ```rust
- /// let v: Vec<char> = "abc åäö".chars().collect();
- /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
- /// ```
fn chars<'a>(&'a self) -> Chars<'a>;
-
- /// An iterator over the bytes of `self`
- ///
- /// # Example
- ///
- /// ```rust
- /// let v: Vec<u8> = "bors".bytes().collect();
- /// assert_eq!(v, b"bors".to_vec());
- /// ```
fn bytes<'a>(&'a self) -> Bytes<'a>;
-
- /// An iterator over the characters of `self` and their byte offsets.
- fn char_indices<'a>(&'a self) -> CharOffsets<'a>;
-
- /// An iterator over substrings of `self`, separated by characters
- /// matched by `sep`.
- ///
- /// # Example
- ///
- /// ```rust
- /// # #![feature(unboxed_closures)]
- ///
- /// # fn main() {
- /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
- /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
- ///
- /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
- /// assert_eq!(v, vec!["abc", "def", "ghi"]);
- ///
- /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
- /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
- ///
- /// let v: Vec<&str> = "".split('X').collect();
- /// assert_eq!(v, vec![""]);
- /// # }
- /// ```
+ fn char_indices<'a>(&'a self) -> CharIndices<'a>;
fn split<'a, Sep: CharEq>(&'a self, sep: Sep) -> CharSplits<'a, Sep>;
-
- /// An iterator over substrings of `self`, separated by characters
- /// matched by `sep`, restricted to splitting at most `count`
- /// times.
- ///
- /// # Example
- ///
- /// ```rust
- /// # #![feature(unboxed_closures)]
- ///
- /// # fn main() {
- /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
- /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
- ///
- /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
- /// assert_eq!(v, vec!["abc", "def2ghi"]);
- ///
- /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
- /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
- ///
- /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
- /// assert_eq!(v, vec!["abcXdef"]);
- ///
- /// let v: Vec<&str> = "".splitn(1, 'X').collect();
- /// assert_eq!(v, vec![""]);
- /// # }
- /// ```
fn splitn<'a, Sep: CharEq>(&'a self, count: uint, sep: Sep) -> CharSplitsN<'a, Sep>;
-
- /// An iterator over substrings of `self`, separated by characters
- /// matched by `sep`.
- ///
- /// Equivalent to `split`, except that the trailing substring
- /// is skipped if empty (terminator semantics).
- ///
- /// # Example
- ///
- /// ```rust
- /// # #![feature(unboxed_closures)]
- ///
- /// # fn main() {
- /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
- /// assert_eq!(v, vec!["A", "B"]);
- ///
- /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
- /// assert_eq!(v, vec!["A", "", "B", ""]);
- ///
- /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
- /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
- ///
- /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
- /// assert_eq!(v, vec!["ghi", "def", "abc"]);
- ///
- /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
- /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
- /// # }
- /// ```
fn split_terminator<'a, Sep: CharEq>(&'a self, sep: Sep) -> CharSplits<'a, Sep>;
-
- /// An iterator over substrings of `self`, separated by characters
- /// matched by `sep`, starting from the end of the string.
- /// Restricted to splitting at most `count` times.
- ///
- /// # Example
- ///
- /// ```rust
- /// # #![feature(unboxed_closures)]
- ///
- /// # fn main() {
- /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
- /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
- ///
- /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
- /// assert_eq!(v, vec!["ghi", "abc1def"]);
- ///
- /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
- /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
- /// # }
- /// ```
fn rsplitn<'a, Sep: CharEq>(&'a self, count: uint, sep: Sep) -> CharSplitsN<'a, Sep>;
-
- /// An iterator over the start and end indices of the disjoint
- /// matches of `sep` within `self`.
- ///
- /// That is, each returned value `(start, end)` satisfies
- /// `self.slice(start, end) == sep`. For matches of `sep` within
- /// `self` that overlap, only the indices corresponding to the
- /// first match are returned.
- ///
- /// # Example
- ///
- /// ```rust
- /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
- /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
- ///
- /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
- /// assert_eq!(v, vec![(1,4), (4,7)]);
- ///
- /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
- /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
- /// ```
fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a>;
-
- /// An iterator over the substrings of `self` separated by `sep`.
- ///
- /// # Example
- ///
- /// ```rust
- /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
- /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
- ///
- /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
- /// assert_eq!(v, vec!["1", "", "2"]);
- /// ```
fn split_str<'a>(&'a self, &'a str) -> StrSplits<'a>;
-
- /// An iterator over the lines of a string (subsequences separated
- /// by `\n`). This does not include the empty string after a
- /// trailing `\n`.
- ///
- /// # Example
- ///
- /// ```rust
- /// let four_lines = "foo\nbar\n\nbaz\n";
- /// let v: Vec<&str> = four_lines.lines().collect();
- /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
- /// ```
- fn lines<'a>(&'a self) -> CharSplits<'a, char>;
-
- /// An iterator over the lines of a string, separated by either
- /// `\n` or `\r\n`. As with `.lines()`, this does not include an
- /// empty trailing line.
- ///
- /// # Example
- ///
- /// ```rust
- /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
- /// let v: Vec<&str> = four_lines.lines_any().collect();
- /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
- /// ```
- fn lines_any<'a>(&'a self) -> AnyLines<'a>;
-
- /// Returns the number of Unicode code points (`char`) that a
- /// string holds.
- ///
- /// This does not perform any normalization, and is `O(n)`, since
- /// UTF-8 is a variable width encoding of code points.
- ///
- /// *Warning*: The number of code points in a string does not directly
- /// correspond to the number of visible characters or width of the
- /// visible text due to composing characters, and double- and
- /// zero-width ones.
- ///
- /// See also `.len()` for the byte length.
- ///
- /// # Example
- ///
- /// ```rust
- /// // composed forms of `ö` and `é`
- /// let c = "Löwe 老虎 Léopard"; // German, Simplified Chinese, French
- /// // decomposed forms of `ö` and `é`
- /// let d = "Lo\u{0308}we 老虎 Le\u{0301}opard";
- ///
- /// assert_eq!(c.char_len(), 15);
- /// assert_eq!(d.char_len(), 17);
- ///
- /// assert_eq!(c.len(), 21);
- /// assert_eq!(d.len(), 23);
- ///
- /// // the two strings *look* the same
- /// println!("{}", c);
- /// println!("{}", d);
- /// ```
+ fn lines<'a>(&'a self) -> Lines<'a>;
+ fn lines_any<'a>(&'a self) -> LinesAny<'a>;
fn char_len(&self) -> uint;
-
- /// Returns a slice of the given string from the byte range
- /// [`begin`..`end`).
- ///
- /// This operation is `O(1)`.
- ///
- /// Panics when `begin` and `end` do not point to valid characters
- /// or point beyond the last character of the string.
- ///
- /// See also `slice_to` and `slice_from` for slicing prefixes and
- /// suffixes of strings, and `slice_chars` for slicing based on
- /// code point counts.
- ///
- /// # Example
- ///
- /// ```rust
- /// let s = "Löwe 老虎 Léopard";
- /// assert_eq!(s.slice(0, 1), "L");
- ///
- /// assert_eq!(s.slice(1, 9), "öwe 老");
- ///
- /// // these will panic:
- /// // byte 2 lies within `ö`:
- /// // s.slice(2, 3);
- ///
- /// // byte 8 lies within `老`
- /// // s.slice(1, 8);
- ///
- /// // byte 100 is outside the string
- /// // s.slice(3, 100);
- /// ```
fn slice<'a>(&'a self, begin: uint, end: uint) -> &'a str;
-
- /// Returns a slice of the string from `begin` to its end.
- ///
- /// Equivalent to `self.slice(begin, self.len())`.
- ///
- /// Panics when `begin` does not point to a valid character, or is
- /// out of bounds.
- ///
- /// See also `slice`, `slice_to` and `slice_chars`.
fn slice_from<'a>(&'a self, begin: uint) -> &'a str;
-
- /// Returns a slice of the string from the beginning to byte
- /// `end`.
- ///
- /// Equivalent to `self.slice(0, end)`.
- ///
- /// Panics when `end` does not point to a valid character, or is
- /// out of bounds.
- ///
- /// See also `slice`, `slice_from` and `slice_chars`.
fn slice_to<'a>(&'a self, end: uint) -> &'a str;
-
- /// Returns a slice of the string from the character range
- /// [`begin`..`end`).
- ///
- /// That is, start at the `begin`-th code point of the string and
- /// continue to the `end`-th code point. This does not detect or
- /// handle edge cases such as leaving a combining character as the
- /// first code point of the string.
- ///
- /// Due to the design of UTF-8, this operation is `O(end)`.
- /// See `slice`, `slice_to` and `slice_from` for `O(1)`
- /// variants that use byte indices rather than code point
- /// indices.
- ///
- /// Panics if `begin` > `end` or the either `begin` or `end` are
- /// beyond the last character of the string.
- ///
- /// # Example
- ///
- /// ```rust
- /// let s = "Löwe 老虎 Léopard";
- /// assert_eq!(s.slice_chars(0, 4), "Löwe");
- /// assert_eq!(s.slice_chars(5, 7), "老虎");
- /// ```
fn slice_chars<'a>(&'a self, begin: uint, end: uint) -> &'a str;
-
- /// Takes a bytewise (not UTF-8) slice from a string.
- ///
- /// Returns the substring from [`begin`..`end`).
- ///
- /// Caller must check both UTF-8 character boundaries and the boundaries of
- /// the entire slice as well.
unsafe fn slice_unchecked<'a>(&'a self, begin: uint, end: uint) -> &'a str;
-
- /// Returns true if `needle` is a prefix of the string.
- ///
- /// # Example
- ///
- /// ```rust
- /// assert!("banana".starts_with("ba"));
- /// ```
fn starts_with(&self, needle: &str) -> bool;
-
- /// Returns true if `needle` is a suffix of the string.
- ///
- /// # Example
- ///
- /// ```rust
- /// assert!("banana".ends_with("nana"));
- /// ```
fn ends_with(&self, needle: &str) -> bool;
-
- /// Returns a string with characters that match `to_trim` removed from the left and the right.
- ///
- /// # Arguments
- ///
- /// * to_trim - a character matcher
- ///
- /// # Example
- ///
- /// ```rust
- /// # #![feature(unboxed_closures)]
- ///
- /// # fn main() {
- /// assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
- /// let x: &[_] = &['1', '2'];
- /// assert_eq!("12foo1bar12".trim_chars(x), "foo1bar");
- /// assert_eq!("123foo1bar123".trim_chars(|&: c: char| c.is_numeric()), "foo1bar");
- /// # }
- /// ```
fn trim_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str;
-
- /// Returns a string with leading `chars_to_trim` removed.
- ///
- /// # Arguments
- ///
- /// * to_trim - a character matcher
- ///
- /// # Example
- ///
- /// ```rust
- /// # #![feature(unboxed_closures)]
- ///
- /// # fn main() {
- /// assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
- /// let x: &[_] = &['1', '2'];
- /// assert_eq!("12foo1bar12".trim_left_chars(x), "foo1bar12");
- /// assert_eq!("123foo1bar123".trim_left_chars(|&: c: char| c.is_numeric()), "foo1bar123");
- /// # }
- /// ```
fn trim_left_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str;
-
- /// Returns a string with trailing `chars_to_trim` removed.
- ///
- /// # Arguments
- ///
- /// * to_trim - a character matcher
- ///
- /// # Example
- ///
- /// ```rust
- /// # #![feature(unboxed_closures)]
- ///
- /// # fn main() {
- /// assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
- /// let x: &[_] = &['1', '2'];
- /// assert_eq!("12foo1bar12".trim_right_chars(x), "12foo1bar");
- /// assert_eq!("123foo1bar123".trim_right_chars(|&: c: char| c.is_numeric()), "123foo1bar");
- /// # }
- /// ```
fn trim_right_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str;
-
- /// Check that `index`-th byte lies at the start and/or end of a
- /// UTF-8 code point sequence.
- ///
- /// The start and end of the string (when `index == self.len()`)
- /// are considered to be boundaries.
- ///
- /// Panics if `index` is greater than `self.len()`.
- ///
- /// # Example
- ///
- /// ```rust
- /// let s = "Löwe 老虎 Léopard";
- /// assert!(s.is_char_boundary(0));
- /// // start of `老`
- /// assert!(s.is_char_boundary(6));
- /// assert!(s.is_char_boundary(s.len()));
- ///
- /// // second byte of `ö`
- /// assert!(!s.is_char_boundary(2));
- ///
- /// // third byte of `老`
- /// assert!(!s.is_char_boundary(8));
- /// ```
fn is_char_boundary(&self, index: uint) -> bool;
-
- /// Pluck a character out of a string and return the index of the next
- /// character.
- ///
- /// This function can be used to iterate over the Unicode characters of a
- /// string.
- ///
- /// # Example
- ///
- /// This example manually iterates through the characters of a
- /// string; this should normally be done by `.chars()` or
- /// `.char_indices`.
- ///
- /// ```rust
- /// use std::str::CharRange;
- ///
- /// let s = "中华Việt Nam";
- /// let mut i = 0u;
- /// while i < s.len() {
- /// let CharRange {ch, next} = s.char_range_at(i);
- /// println!("{}: {}", i, ch);
- /// i = next;
- /// }
- /// ```
- ///
- /// This outputs:
- ///
- /// ```text
- /// 0: 中
- /// 3: 华
- /// 6: V
- /// 7: i
- /// 8: ệ
- /// 11: t
- /// 12:
- /// 13: N
- /// 14: a
- /// 15: m
- /// ```
- ///
- /// # Arguments
- ///
- /// * s - The string
- /// * i - The byte offset of the char to extract
- ///
- /// # Return value
- ///
- /// A record {ch: char, next: uint} containing the char value and the byte
- /// index of the next Unicode character.
- ///
- /// # Panics
- ///
- /// If `i` is greater than or equal to the length of the string.
- /// If `i` is not the index of the beginning of a valid UTF-8 character.
fn char_range_at(&self, start: uint) -> CharRange;
-
- /// Given a byte position and a str, return the previous char and its position.
- ///
- /// This function can be used to iterate over a Unicode string in reverse.
- ///
- /// Returns 0 for next index if called on start index 0.
- ///
- /// # Panics
- ///
- /// If `i` is greater than the length of the string.
- /// If `i` is not an index following a valid UTF-8 character.
fn char_range_at_reverse(&self, start: uint) -> CharRange;
-
- /// Plucks the character starting at the `i`th byte of a string.
- ///
- /// # Example
- ///
- /// ```rust
- /// let s = "abπc";
- /// assert_eq!(s.char_at(1), 'b');
- /// assert_eq!(s.char_at(2), 'π');
- /// assert_eq!(s.char_at(4), 'c');
- /// ```
- ///
- /// # Panics
- ///
- /// If `i` is greater than or equal to the length of the string.
- /// If `i` is not the index of the beginning of a valid UTF-8 character.
fn char_at(&self, i: uint) -> char;
-
- /// Plucks the character ending at the `i`th byte of a string.
- ///
- /// # Panics
- ///
- /// If `i` is greater than the length of the string.
- /// If `i` is not an index following a valid UTF-8 character.
fn char_at_reverse(&self, i: uint) -> char;
-
- /// Work with the byte buffer of a string as a byte slice.
- ///
- /// # Example
- ///
- /// ```rust
- /// assert_eq!("bors".as_bytes(), b"bors");
- /// ```
fn as_bytes<'a>(&'a self) -> &'a [u8];
-
- /// Returns the byte index of the first character of `self` that
- /// matches `search`.
- ///
- /// # Return value
- ///
- /// `Some` containing the byte index of the last matching character
- /// or `None` if there is no match
- ///
- /// # Example
- ///
- /// ```rust
- /// # #![feature(unboxed_closures)]
- ///
- /// # fn main() {
- /// let s = "Löwe 老虎 Léopard";
- ///
- /// assert_eq!(s.find('L'), Some(0));
- /// assert_eq!(s.find('é'), Some(14));
- ///
- /// // the first space
- /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
- ///
- /// // neither are found
- /// let x: &[_] = &['1', '2'];
- /// assert_eq!(s.find(x), None);
- /// # }
- /// ```
fn find<C: CharEq>(&self, search: C) -> Option<uint>;
-
- /// Returns the byte index of the last character of `self` that
- /// matches `search`.
- ///
- /// # Return value
- ///
- /// `Some` containing the byte index of the last matching character
- /// or `None` if there is no match.
- ///
- /// # Example
- ///
- /// ```rust
- /// # #![feature(unboxed_closures)]
- ///
- /// # fn main() {
- /// let s = "Löwe 老虎 Léopard";
- ///
- /// assert_eq!(s.rfind('L'), Some(13));
- /// assert_eq!(s.rfind('é'), Some(14));
- ///
- /// // the second space
- /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
- ///
- /// // searches for an occurrence of either `1` or `2`, but neither are found
- /// let x: &[_] = &['1', '2'];
- /// assert_eq!(s.rfind(x), None);
- /// # }
- /// ```
fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
-
- /// Returns the byte index of the first matching substring
- ///
- /// # Arguments
- ///
- /// * `needle` - The string to search for
- ///
- /// # Return value
- ///
- /// `Some` containing the byte index of the first matching substring
- /// or `None` if there is no match.
- ///
- /// # Example
- ///
- /// ```rust
- /// let s = "Löwe 老虎 Léopard";
- ///
- /// assert_eq!(s.find_str("老虎 L"), Some(6));
- /// assert_eq!(s.find_str("muffin man"), None);
- /// ```
fn find_str(&self, &str) -> Option<uint>;
-
- /// Retrieves the first character from a string slice and returns
- /// it. This does not allocate a new string; instead, it returns a
- /// slice that point one character beyond the character that was
- /// shifted. If the string does not contain any characters,
- /// None is returned instead.
- ///
- /// # Example
- ///
- /// ```rust
- /// let s = "Löwe 老虎 Léopard";
- /// let (c, s1) = s.slice_shift_char().unwrap();
- /// assert_eq!(c, 'L');
- /// assert_eq!(s1, "öwe 老虎 Léopard");
- ///
- /// let (c, s2) = s1.slice_shift_char().unwrap();
- /// assert_eq!(c, 'ö');
- /// assert_eq!(s2, "we 老虎 Léopard");
- /// ```
fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>;
-
- /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
- ///
- /// Panics if `inner` is not a direct slice contained within self.
- ///
- /// # Example
- ///
- /// ```rust
- /// let string = "a\nb\nc";
- /// let lines: Vec<&str> = string.lines().collect();
- ///
- /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
- /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
- /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
- /// ```
fn subslice_offset(&self, inner: &str) -> uint;
-
- /// Return an unsafe pointer to the strings buffer.
- ///
- /// The caller must ensure that the string outlives this pointer,
- /// and that it is not reallocated (e.g. by pushing to the
- /// string).
fn as_ptr(&self) -> *const u8;
-
- /// Return an iterator of `u16` over the string encoded as UTF-16.
- fn utf16_units<'a>(&'a self) -> Utf16CodeUnits<'a>;
-
- /// Return the number of bytes in this string
- ///
- /// # Example
- ///
- /// ```
- /// assert_eq!("foo".len(), 3);
- /// assert_eq!("ƒoo".len(), 4);
- /// ```
- #[experimental = "not triaged yet"]
fn len(&self) -> uint;
-
- /// Returns true if this slice contains no bytes
- ///
- /// # Example
- ///
- /// ```
- /// assert!("".is_empty());
- /// ```
- #[inline]
- #[experimental = "not triaged yet"]
- fn is_empty(&self) -> bool { self.len() == 0 }
+ fn is_empty(&self) -> bool;
}
#[inline(never)]
begin, end, s);
}
-impl StrPrelude for str {
+impl StrExt for str {
#[inline]
fn contains(&self, needle: &str) -> bool {
self.find_str(needle).is_some()
fn bytes(&self) -> Bytes {
fn deref(&x: &u8) -> u8 { x }
- self.as_bytes().iter().map(BytesFn(deref))
+ Bytes { inner: self.as_bytes().iter().map(BytesFn(deref)) }
}
#[inline]
- fn char_indices(&self) -> CharOffsets {
- CharOffsets{front_offset: 0, iter: self.chars()}
+ fn char_indices(&self) -> CharIndices {
+ CharIndices { front_offset: 0, iter: self.chars() }
}
#[inline]
}
#[inline]
- fn lines(&self) -> CharSplits<char> {
- self.split_terminator('\n')
+ fn lines(&self) -> Lines {
+ Lines { inner: self.split_terminator('\n') }
}
- fn lines_any(&self) -> AnyLines {
+ fn lines_any(&self) -> LinesAny {
fn f(line: &str) -> &str {
let l = line.len();
if l > 0 && line.as_bytes()[l - 1] == b'\r' { line.slice(0, l - 1) }
else { line }
}
- self.lines().map(f)
+ LinesAny { inner: self.lines().map(f) }
}
#[inline]
}
#[inline]
- fn utf16_units(&self) -> Utf16CodeUnits {
- Utf16CodeUnits { encoder: Utf16Encoder::new(self.chars()) }
- }
+ fn len(&self) -> uint { self.repr().len }
#[inline]
- fn len(&self) -> uint { self.repr().len }
+ fn is_empty(&self) -> bool { self.len() == 0 }
}
#[stable]
fn default() -> &'a str { "" }
}
+
+impl<'a> Iterator<&'a str> for Lines<'a> {
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> { self.inner.next() }
+}
+impl<'a> DoubleEndedIterator<&'a str> for Lines<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
+}
+impl<'a> Iterator<&'a str> for LinesAny<'a> {
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> { self.inner.next() }
+}
+impl<'a> DoubleEndedIterator<&'a str> for LinesAny<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
+}
+impl<'a> Iterator<u8> for Bytes<'a> {
+ #[inline]
+ fn next(&mut self) -> Option<u8> { self.inner.next() }
+}
+impl<'a> DoubleEndedIterator<u8> for Bytes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<u8> { self.inner.next_back() }
+}
+impl<'a> ExactSizeIterator<u8> for Bytes<'a> {}
pub mod ppaux;
pub mod nodemap;
pub mod snapshot_vec;
+ pub mod lev_distance;
}
pub mod lib {
--- /dev/null
+// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::cmp;
+
+pub fn lev_distance(me: &str, t: &str) -> uint {
+ if me.is_empty() { return t.chars().count(); }
+ if t.is_empty() { return me.chars().count(); }
+
+ let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
+ let mut t_last = 0;
+
+ for (i, sc) in me.chars().enumerate() {
+
+ let mut current = i;
+ dcol[0] = current + 1;
+
+ for (j, tc) in t.chars().enumerate() {
+
+ let next = dcol[j + 1];
+
+ if sc == tc {
+ dcol[j + 1] = current;
+ } else {
+ dcol[j + 1] = cmp::min(current, next);
+ dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
+ }
+
+ current = next;
+ t_last = j;
+ }
+ }
+
+ dcol[t_last + 1]
+}
+
+#[test]
+fn test_lev_distance() {
+ use std::char::{ from_u32, MAX };
+ // Test bytelength agnosticity
+ for c in range(0u32, MAX as u32)
+ .filter_map(|i| from_u32(i))
+ .map(|i| String::from_char(1, i)) {
+ assert_eq!(lev_distance(c[], c[]), 0);
+ }
+
+ let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
+ let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
+ let c = "Mary häd ä little lämb\n\nLittle lämb\n";
+ assert_eq!(lev_distance(a, b), 1);
+ assert_eq!(lev_distance(b, a), 1);
+ assert_eq!(lev_distance(a, c), 2);
+ assert_eq!(lev_distance(c, a), 2);
+ assert_eq!(lev_distance(b, c), 1);
+ assert_eq!(lev_distance(c, b), 1);
+}
use rustc::middle::subst::{ParamSpace, FnSpace, TypeSpace};
use rustc::middle::ty::{CaptureModeMap, Freevar, FreevarMap, TraitMap};
use rustc::util::nodemap::{NodeMap, NodeSet, DefIdSet, FnvHashMap};
+use rustc::util::lev_distance::lev_distance;
use syntax::ast::{Arm, BindByRef, BindByValue, BindingMode, Block, Crate, CrateNum};
use syntax::ast::{DeclItem, DefId, Expr, ExprAgain, ExprBreak, ExprField};
use std::rc::{Rc, Weak};
use std::uint;
-mod check_unused;
-mod record_exports;
+// Definition mapping
+pub type DefMap = RefCell<NodeMap<Def>>;
#[deriving(Copy)]
struct BindingInfo {
let mut smallest = 0;
for (i, other) in maybes.iter().enumerate() {
- values[i] = name.lev_distance(other.get());
+ values[i] = lev_distance(name, other.get());
if values[i] <= values[smallest] {
smallest = i;
//! }
//! ```
-use option::Option;
-use option::Option::None;
-use kinds::Send;
-use string::String;
+use prelude::*;
+
+use str::Utf8Error;
/// Base functionality for all errors in Rust.
pub trait Error: Send {
err
}
}
+
+impl Error for Utf8Error {
+ fn description(&self) -> &str {
+ match *self {
+ Utf8Error::TooShort => "invalid utf-8: not enough bytes",
+ Utf8Error::InvalidByte(..) => "invalid utf-8: corrupt contents",
+ }
+ }
+
+ fn detail(&self) -> Option<String> { Some(self.to_string()) }
+}
// Push it onto the list.
let ptr = ptr as *const u16;
let buf = slice::from_raw_buf(&ptr, len);
- let opt_s = String::from_utf16(::str::truncate_utf16_at_nul(buf));
+ let opt_s = String::from_utf16(os_imp::truncate_utf16_at_nul(buf));
opt_s.expect("CommandLineToArgvW returned invalid UTF-16")
});
const BUF_BYTES : uint = 2048u;
+/// Return a slice of `v` ending at (and not including) the first NUL
+/// (0).
+pub fn truncate_utf16_at_nul<'a>(v: &'a [u16]) -> &'a [u16] {
+ match v.iter().position(|c| *c == 0) {
+ // don't include the 0
+ Some(i) => v[..i],
+ None => v
+ }
+}
+
pub fn errno() -> uint {
use libc::types::os::arch::extra::DWORD;
return format!("OS Error {} (FormatMessageW() returned error {})", errnum, fm_err);
}
- let msg = String::from_utf16(::str::truncate_utf16_at_nul(&buf));
+ let msg = String::from_utf16(truncate_utf16_at_nul(&buf));
match msg {
Some(msg) => format!("OS Error {}: {}", errnum, msg),
None => format!("OS Error {} (FormatMessageW() returned invalid UTF-16)", errnum),
return info.dwPageSize as uint;
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::truncate_utf16_at_nul;
+
+ #[test]
+ fn test_truncate_utf16_at_nul() {
+ let v = [];
+ let b: &[u16] = &[];
+ assert_eq!(truncate_utf16_at_nul(&v), b);
+
+ let v = [0, 2, 3];
+ assert_eq!(truncate_utf16_at_nul(&v), b);
+
+ let v = [1, 0, 3];
+ let b: &[u16] = &[1];
+ assert_eq!(truncate_utf16_at_nul(&v), b);
+
+ let v = [1, 2, 0];
+ let b: &[u16] = &[1, 2];
+ assert_eq!(truncate_utf16_at_nul(&v), b);
+
+ let v = [1, 2, 3];
+ let b: &[u16] = &[1, 2, 3];
+ assert_eq!(truncate_utf16_at_nul(&v), b);
+ }
+}
html_root_url = "http://doc.rust-lang.org/nightly/",
html_playground_url = "http://play.rust-lang.org/")]
#![no_std]
-#![feature(globs)]
-#![feature(unboxed_closures)]
+#![feature(globs, macro_rules, slicing_syntax, unboxed_closures)]
extern crate core;
}
pub mod str {
- pub use u_str::{UnicodeStrPrelude, Words, Graphemes, GraphemeIndices};
+ pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices};
+ pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};
+ pub use u_str::{utf16_items, Utf16Encoder};
}
-// this lets us use #[deriving(Clone)]
+// this lets us use #[deriving(..)]
mod std {
pub use core::clone;
pub use core::cmp;
+ pub use core::fmt;
}
//! This module provides functionality to `str` that requires the Unicode methods provided by the
//! UnicodeChar trait.
-use self::GraphemeState::*;
+use core::prelude::*;
+
+use core::char;
use core::cmp;
-use core::slice::SliceExt;
-use core::iter::{Filter, AdditiveIterator, Iterator, IteratorExt};
use core::iter::{DoubleEndedIterator, DoubleEndedIteratorExt};
+use core::iter::{Filter, AdditiveIterator, Iterator, IteratorExt};
+use core::iter::{Filter, AdditiveIterator};
use core::kinds::Sized;
-use core::option::Option;
+use core::mem;
+use core::num::Int;
use core::option::Option::{None, Some};
+use core::option::Option;
+use core::slice::SliceExt;
+use core::slice;
use core::str::{CharSplits, StrPrelude};
+use core::str::{CharSplits};
+
use u_char::UnicodeChar;
use tables::grapheme::GraphemeCat;
/// An iterator over the words of a string, separated by a sequence of whitespace
/// FIXME: This should be opaque
-pub type Words<'a> = Filter<&'a str, CharSplits<'a, fn(char) -> bool>, fn(&&str) -> bool>;
+#[stable]
+pub struct Words<'a> {
+ inner: Filter<'a, &'a str, CharSplits<'a, |char|:'a -> bool>,
+ fn(&&str) -> bool>,
+}
/// Methods for Unicode string slices
-pub trait UnicodeStrPrelude for Sized? {
+pub trait UnicodeStr for Sized? {
/// Returns an iterator over the
/// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
/// of the string.
/// let v: Vec<&str> = some_words.words().collect();
/// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
/// ```
+ #[stable]
fn words<'a>(&'a self) -> Words<'a>;
/// Returns true if the string contains only whitespace.
fn trim_right<'a>(&'a self) -> &'a str;
}
-impl UnicodeStrPrelude for str {
+impl UnicodeStr for str {
#[inline]
fn graphemes(&self, is_extended: bool) -> Graphemes {
Graphemes { string: self, extended: is_extended, cat: None, catb: None }
fn is_not_empty(s: &&str) -> bool { !s.is_empty() }
fn is_whitespace(c: char) -> bool { c.is_whitespace() }
- self.split(is_whitespace).filter(is_not_empty)
+ Words { inner: self.split(is_whitespace).filter(is_not_empty) }
}
#[inline]
Some(retstr)
}
}
+
+// https://tools.ietf.org/html/rfc3629
+static UTF8_CHAR_WIDTH: [u8, ..256] = [
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
+0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
+4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
+];
+
+/// Given a first byte, determine how many bytes are in this UTF-8 character
+#[inline]
+pub fn utf8_char_width(b: u8) -> uint {
+ return UTF8_CHAR_WIDTH[b as uint] as uint;
+}
+
+/// Determines if a vector of `u16` contains valid UTF-16
+pub fn is_utf16(v: &[u16]) -> bool {
+ let mut it = v.iter();
+ macro_rules! next ( ($ret:expr) => {
+ match it.next() { Some(u) => *u, None => return $ret }
+ }
+ )
+ loop {
+ let u = next!(true);
+
+ match char::from_u32(u as u32) {
+ Some(_) => {}
+ None => {
+ let u2 = next!(false);
+ if u < 0xD7FF || u > 0xDBFF ||
+ u2 < 0xDC00 || u2 > 0xDFFF { return false; }
+ }
+ }
+ }
+}
+
+/// An iterator that decodes UTF-16 encoded codepoints from a vector
+/// of `u16`s.
+#[deriving(Clone)]
+pub struct Utf16Items<'a> {
+ iter: slice::Items<'a, u16>
+}
+/// The possibilities for values decoded from a `u16` stream.
+#[deriving(PartialEq, Eq, Clone, Show)]
+pub enum Utf16Item {
+ /// A valid codepoint.
+ ScalarValue(char),
+ /// An invalid surrogate without its pair.
+ LoneSurrogate(u16)
+}
+
+impl Copy for Utf16Item {}
+
+impl Utf16Item {
+ /// Convert `self` to a `char`, taking `LoneSurrogate`s to the
+ /// replacement character (U+FFFD).
+ #[inline]
+ pub fn to_char_lossy(&self) -> char {
+ match *self {
+ Utf16Item::ScalarValue(c) => c,
+ Utf16Item::LoneSurrogate(_) => '\uFFFD'
+ }
+ }
+}
+
+impl<'a> Iterator<Utf16Item> for Utf16Items<'a> {
+ fn next(&mut self) -> Option<Utf16Item> {
+ let u = match self.iter.next() {
+ Some(u) => *u,
+ None => return None
+ };
+
+ if u < 0xD800 || 0xDFFF < u {
+ // not a surrogate
+ Some(Utf16Item::ScalarValue(unsafe {mem::transmute(u as u32)}))
+ } else if u >= 0xDC00 {
+ // a trailing surrogate
+ Some(Utf16Item::LoneSurrogate(u))
+ } else {
+ // preserve state for rewinding.
+ let old = self.iter;
+
+ let u2 = match self.iter.next() {
+ Some(u2) => *u2,
+ // eof
+ None => return Some(Utf16Item::LoneSurrogate(u))
+ };
+ if u2 < 0xDC00 || u2 > 0xDFFF {
+ // not a trailing surrogate so we're not a valid
+ // surrogate pair, so rewind to redecode u2 next time.
+ self.iter = old;
+ return Some(Utf16Item::LoneSurrogate(u))
+ }
+
+ // all ok, so lets decode it.
+ let c = ((u - 0xD800) as u32 << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
+ Some(Utf16Item::ScalarValue(unsafe {mem::transmute(c)}))
+ }
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) {
+ let (low, high) = self.iter.size_hint();
+ // we could be entirely valid surrogates (2 elements per
+ // char), or entirely non-surrogates (1 element per char)
+ (low / 2, high)
+ }
+}
+
+/// Create an iterator over the UTF-16 encoded codepoints in `v`,
+/// returning invalid surrogates as `LoneSurrogate`s.
+///
+/// # Example
+///
+/// ```rust
+/// use std::str;
+/// use std::str::{ScalarValue, LoneSurrogate};
+///
+/// // 𝄞mus<invalid>ic<invalid>
+/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
+/// 0x0073, 0xDD1E, 0x0069, 0x0063,
+/// 0xD834];
+///
+/// assert_eq!(str::utf16_items(&v).collect::<Vec<_>>(),
+/// vec![ScalarValue('𝄞'),
+/// ScalarValue('m'), ScalarValue('u'), ScalarValue('s'),
+/// LoneSurrogate(0xDD1E),
+/// ScalarValue('i'), ScalarValue('c'),
+/// LoneSurrogate(0xD834)]);
+/// ```
+pub fn utf16_items<'a>(v: &'a [u16]) -> Utf16Items<'a> {
+ Utf16Items { iter : v.iter() }
+}
+
+/// Iterator adaptor for encoding `char`s to UTF-16.
+#[deriving(Clone)]
+pub struct Utf16Encoder<I> {
+ chars: I,
+ extra: u16
+}
+
+impl<I> Utf16Encoder<I> {
+ /// Create an UTF-16 encoder from any `char` iterator.
+ pub fn new(chars: I) -> Utf16Encoder<I> where I: Iterator<char> {
+ Utf16Encoder { chars: chars, extra: 0 }
+ }
+}
+
+impl<I> Iterator<u16> for Utf16Encoder<I> where I: Iterator<char> {
+ #[inline]
+ fn next(&mut self) -> Option<u16> {
+ if self.extra != 0 {
+ let tmp = self.extra;
+ self.extra = 0;
+ return Some(tmp);
+ }
+
+ let mut buf = [0u16, ..2];
+ self.chars.next().map(|ch| {
+ let n = ch.encode_utf16(buf[mut]).unwrap_or(0);
+ if n == 2 { self.extra = buf[1]; }
+ buf[0]
+ })
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) {
+ let (low, high) = self.chars.size_hint();
+ // every char gets either one u16 or two u16,
+ // so this iterator is between 1 or 2 times as
+ // long as the underlying iterator.
+ (low, high.and_then(|n| n.checked_mul(2)))
+ }
+}
+
+impl<'a> Iterator<&'a str> for Words<'a> {
+ fn next(&mut self) -> Option<&'a str> { self.inner.next() }
+}
+impl<'a> DoubleEndedIterator<&'a str> for Words<'a> {
+ fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
+}