1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! String manipulation
15 //! For more details, see std::str
17 #![doc(primitive = "str")]
19 use self::OldSearcher::{TwoWay, TwoWayLong};
27 use iter::ExactSizeIterator;
28 use iter::{Map, Iterator, IteratorExt, DoubleEndedIterator};
33 use option::Option::{self, None, Some};
34 use raw::{Repr, Slice};
35 use result::Result::{self, Ok, Err};
36 use slice::{self, SliceExt};
39 pub use self::pattern::Pattern;
40 pub use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep};
44 macro_rules! delegate_iter {
45 (exact $te:ty : $ti:ty) => {
46 delegate_iter!{$te : $ti}
47 impl<'a> ExactSizeIterator for $ti {
49 fn len(&self) -> usize {
54 ($te:ty : $ti:ty) => {
55 #[stable(feature = "rust1", since = "1.0.0")]
56 impl<'a> Iterator for $ti {
60 fn next(&mut self) -> Option<$te> {
64 fn size_hint(&self) -> (usize, Option<usize>) {
68 #[stable(feature = "rust1", since = "1.0.0")]
69 impl<'a> DoubleEndedIterator for $ti {
71 fn next_back(&mut self) -> Option<$te> {
76 (pattern $te:ty : $ti:ty) => {
77 #[stable(feature = "rust1", since = "1.0.0")]
78 impl<'a, P: Pattern<'a>> Iterator for $ti {
82 fn next(&mut self) -> Option<$te> {
86 fn size_hint(&self) -> (usize, Option<usize>) {
90 #[stable(feature = "rust1", since = "1.0.0")]
91 impl<'a, P: Pattern<'a>> DoubleEndedIterator for $ti
92 where P::Searcher: DoubleEndedSearcher<'a> {
94 fn next_back(&mut self) -> Option<$te> {
99 (pattern forward $te:ty : $ti:ty) => {
100 #[stable(feature = "rust1", since = "1.0.0")]
101 impl<'a, P: Pattern<'a>> Iterator for $ti
102 where P::Searcher: DoubleEndedSearcher<'a> {
106 fn next(&mut self) -> Option<$te> {
110 fn size_hint(&self) -> (usize, Option<usize>) {
115 (pattern reverse $te:ty : $ti:ty) => {
116 #[stable(feature = "rust1", since = "1.0.0")]
117 impl<'a, P: Pattern<'a>> Iterator for $ti
118 where P::Searcher: ReverseSearcher<'a>
123 fn next(&mut self) -> Option<$te> {
127 fn size_hint(&self) -> (usize, Option<usize>) {
134 /// A trait to abstract the idea of creating a new instance of a type from a
136 #[stable(feature = "rust1", since = "1.0.0")]
138 /// The associated error which can be returned from parsing.
139 #[stable(feature = "rust1", since = "1.0.0")]
142 /// Parses a string `s` to return an optional value of this type. If the
143 /// string is ill-formatted, the None is returned.
144 #[stable(feature = "rust1", since = "1.0.0")]
145 fn from_str(s: &str) -> Result<Self, Self::Err>;
148 #[stable(feature = "rust1", since = "1.0.0")]
149 impl FromStr for bool {
150 type Err = ParseBoolError;
152 /// Parse a `bool` from a string.
154 /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
155 /// actually be parseable.
160 /// use std::str::FromStr;
162 /// assert_eq!(FromStr::from_str("true"), Ok(true));
163 /// assert_eq!(FromStr::from_str("false"), Ok(false));
164 /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
167 /// Note, in many cases, the StrExt::parse() which is based on
168 /// this FromStr::from_str() is more proper.
171 /// assert_eq!("true".parse(), Ok(true));
172 /// assert_eq!("false".parse(), Ok(false));
173 /// assert!("not even a boolean".parse::<bool>().is_err());
176 fn from_str(s: &str) -> Result<bool, ParseBoolError> {
179 "false" => Ok(false),
180 _ => Err(ParseBoolError { _priv: () }),
185 /// An error returned when parsing a `bool` from a string fails.
186 #[derive(Debug, Clone, PartialEq)]
187 #[stable(feature = "rust1", since = "1.0.0")]
188 pub struct ParseBoolError { _priv: () }
190 #[stable(feature = "rust1", since = "1.0.0")]
191 impl fmt::Display for ParseBoolError {
192 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193 "provided string was not `true` or `false`".fmt(f)
197 #[stable(feature = "rust1", since = "1.0.0")]
198 impl Error for ParseBoolError {
199 fn description(&self) -> &str { "failed to parse bool" }
203 Section: Creating a string
206 /// Errors which can occur when attempting to interpret a byte slice as a `str`.
207 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
208 #[unstable(feature = "core",
209 reason = "error enumeration recently added and definitions may be refined")]
211 /// An invalid byte was detected at the byte offset given.
213 /// The offset is guaranteed to be in bounds of the slice in question, and
214 /// the byte at the specified offset was the first invalid byte in the
215 /// sequence detected.
218 /// The byte slice was invalid because more bytes were needed but no more
219 /// bytes were available.
223 /// Converts a slice of bytes to a string slice without performing any
226 /// Once the slice has been validated as utf-8, it is transmuted in-place and
227 /// returned as a '&str' instead of a '&[u8]'
231 /// Returns `Err` if the slice is not utf-8 with a description as to why the
232 /// provided slice is not utf-8.
233 #[stable(feature = "rust1", since = "1.0.0")]
234 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
235 try!(run_utf8_validation_iterator(&mut v.iter()));
236 Ok(unsafe { from_utf8_unchecked(v) })
239 /// Converts a slice of bytes to a string slice without checking
240 /// that the string contains valid UTF-8.
241 #[stable(feature = "rust1", since = "1.0.0")]
242 pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
246 /// Constructs a static string slice from a given raw pointer.
248 /// This function will read memory starting at `s` until it finds a 0, and then
249 /// transmute the memory up to that point as a string slice, returning the
250 /// corresponding `&'static str` value.
252 /// This function is unsafe because the caller must ensure the C string itself
253 /// has the static lifetime and that the memory `s` is valid up to and including
254 /// the first null byte.
258 /// This function will panic if the string pointed to by `s` is not valid UTF-8.
259 #[unstable(feature = "core")]
260 #[deprecated(since = "1.0.0",
261 reason = "use std::ffi::c_str_to_bytes + str::from_utf8")]
262 pub unsafe fn from_c_str(s: *const i8) -> &'static str {
263 let s = s as *const u8;
265 while *s.offset(len as isize) != 0 {
268 let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
269 from_utf8(v).ok().expect("from_c_str passed invalid utf-8 data")
272 /// Something that can be used to compare against a character
273 #[unstable(feature = "core")]
274 #[deprecated(since = "1.0.0",
275 reason = "use `Pattern` instead")]
276 // NB: Rather than removing it, make it private and move it into self::pattern
278 /// Determine if the splitter should split at the given character
279 fn matches(&mut self, char) -> bool;
280 /// Indicate if this is only concerned about ASCII characters,
281 /// which can allow for a faster implementation.
282 fn only_ascii(&self) -> bool;
285 #[allow(deprecated) /* for CharEq */ ]
286 impl CharEq for char {
288 fn matches(&mut self, c: char) -> bool { *self == c }
291 fn only_ascii(&self) -> bool { (*self as u32) < 128 }
294 #[allow(deprecated) /* for CharEq */ ]
295 impl<F> CharEq for F where F: FnMut(char) -> bool {
297 fn matches(&mut self, c: char) -> bool { (*self)(c) }
300 fn only_ascii(&self) -> bool { false }
303 #[allow(deprecated) /* for CharEq */ ]
304 impl<'a> CharEq for &'a [char] {
306 #[allow(deprecated) /* for CharEq */ ]
307 fn matches(&mut self, c: char) -> bool {
308 self.iter().any(|&m| { let mut m = m; m.matches(c) })
312 #[allow(deprecated) /* for CharEq */ ]
313 fn only_ascii(&self) -> bool {
314 self.iter().all(|m| m.only_ascii())
318 #[stable(feature = "rust1", since = "1.0.0")]
319 impl Error for Utf8Error {
320 fn description(&self) -> &str {
322 Utf8Error::TooShort => "invalid utf-8: not enough bytes",
323 Utf8Error::InvalidByte(..) => "invalid utf-8: corrupt contents",
328 #[stable(feature = "rust1", since = "1.0.0")]
329 impl fmt::Display for Utf8Error {
330 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
332 Utf8Error::InvalidByte(n) => {
333 write!(f, "invalid utf-8: invalid byte at index {}", n)
335 Utf8Error::TooShort => {
336 write!(f, "invalid utf-8: byte slice too short")
346 /// Iterator for the char (representing *Unicode Scalar Values*) of a string
348 /// Created with the method `.chars()`.
350 #[stable(feature = "rust1", since = "1.0.0")]
351 pub struct Chars<'a> {
352 iter: slice::Iter<'a, u8>
355 /// Return the initial codepoint accumulator for the first byte.
356 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
357 /// for width 3, and 3 bits for width 4.
359 fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
361 /// Return the value of `ch` updated with continuation byte `byte`.
363 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
365 /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
368 fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
371 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
378 /// Reads the next code point out of a byte iterator (assuming a
379 /// UTF-8-like encoding).
380 #[unstable(feature = "core")]
382 pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
384 let x = match bytes.next() {
386 Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
387 Some(&next_byte) => next_byte,
390 // Multibyte case follows
391 // Decode from a byte combination out of: [[[x y] z] w]
392 // NOTE: Performance is sensitive to the exact formulation here
393 let init = utf8_first_byte(x, 2);
394 let y = unwrap_or_0(bytes.next());
395 let mut ch = utf8_acc_cont_byte(init, y);
398 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
399 let z = unwrap_or_0(bytes.next());
400 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
401 ch = init << 12 | y_z;
404 // use only the lower 3 bits of `init`
405 let w = unwrap_or_0(bytes.next());
406 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
413 /// Reads the last code point out of a byte iterator (assuming a
414 /// UTF-8-like encoding).
415 #[unstable(feature = "core")]
417 pub fn next_code_point_reverse(bytes: &mut slice::Iter<u8>) -> Option<u32> {
419 let w = match bytes.next_back() {
421 Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
422 Some(&back_byte) => back_byte,
425 // Multibyte case follows
426 // Decode from a byte combination out of: [x [y [z w]]]
428 let z = unwrap_or_0(bytes.next_back());
429 ch = utf8_first_byte(z, 2);
430 if utf8_is_cont_byte(z) {
431 let y = unwrap_or_0(bytes.next_back());
432 ch = utf8_first_byte(y, 3);
433 if utf8_is_cont_byte(y) {
434 let x = unwrap_or_0(bytes.next_back());
435 ch = utf8_first_byte(x, 4);
436 ch = utf8_acc_cont_byte(ch, y);
438 ch = utf8_acc_cont_byte(ch, z);
440 ch = utf8_acc_cont_byte(ch, w);
445 #[stable(feature = "rust1", since = "1.0.0")]
446 impl<'a> Iterator for Chars<'a> {
450 fn next(&mut self) -> Option<char> {
451 next_code_point(&mut self.iter).map(|ch| {
452 // str invariant says `ch` is a valid Unicode Scalar Value
460 fn size_hint(&self) -> (usize, Option<usize>) {
461 let (len, _) = self.iter.size_hint();
462 // `(len + 3)` can't overflow, because we know that the `slice::Iter`
463 // belongs to a slice in memory which has a maximum length of
464 // `isize::MAX` (that's well below `usize::MAX`).
465 ((len + 3) / 4, Some(len))
469 #[stable(feature = "rust1", since = "1.0.0")]
470 impl<'a> DoubleEndedIterator for Chars<'a> {
472 fn next_back(&mut self) -> Option<char> {
473 next_code_point_reverse(&mut self.iter).map(|ch| {
474 // str invariant says `ch` is a valid Unicode Scalar Value
482 /// External iterator for a string's characters and their byte offsets.
483 /// Use with the `std::iter` module.
485 #[stable(feature = "rust1", since = "1.0.0")]
486 pub struct CharIndices<'a> {
491 #[stable(feature = "rust1", since = "1.0.0")]
492 impl<'a> Iterator for CharIndices<'a> {
493 type Item = (usize, char);
496 fn next(&mut self) -> Option<(usize, char)> {
497 let (pre_len, _) = self.iter.iter.size_hint();
498 match self.iter.next() {
501 let index = self.front_offset;
502 let (len, _) = self.iter.iter.size_hint();
503 self.front_offset += pre_len - len;
510 fn size_hint(&self) -> (usize, Option<usize>) {
511 self.iter.size_hint()
515 #[stable(feature = "rust1", since = "1.0.0")]
516 impl<'a> DoubleEndedIterator for CharIndices<'a> {
518 fn next_back(&mut self) -> Option<(usize, char)> {
519 match self.iter.next_back() {
522 let (len, _) = self.iter.iter.size_hint();
523 let index = self.front_offset + len;
530 /// External iterator for a string's bytes.
531 /// Use with the `std::iter` module.
533 /// Created with `StrExt::bytes`
534 #[stable(feature = "rust1", since = "1.0.0")]
536 pub struct Bytes<'a>(Map<slice::Iter<'a, u8>, BytesDeref>);
537 delegate_iter!{exact u8 : Bytes<'a>}
539 /// A temporary fn new type that ensures that the `Bytes` iterator
541 #[derive(Copy, Clone)]
544 impl<'a> Fn<(&'a u8,)> for BytesDeref {
548 extern "rust-call" fn call(&self, (ptr,): (&'a u8,)) -> u8 {
553 /// An iterator over the substrings of a string, separated by `sep`.
554 struct CharSplits<'a, P: Pattern<'a>> {
555 /// The slice remaining to be iterated
558 matcher: P::Searcher,
559 /// Whether an empty string at the end is allowed
560 allow_trailing_empty: bool,
564 /// An iterator over the substrings of a string, separated by `sep`,
565 /// splitting at most `count` times.
566 struct CharSplitsN<'a, P: Pattern<'a>> {
567 iter: CharSplits<'a, P>,
568 /// The number of splits remaining
573 /// An iterator over the substrings of a string, separated by a
574 /// pattern, in reverse order.
575 struct RCharSplits<'a, P: Pattern<'a>> {
576 /// The slice remaining to be iterated
579 matcher: P::Searcher,
580 /// Whether an empty string at the end of iteration is allowed
581 allow_final_empty: bool,
586 /// An iterator over the lines of a string, separated by `\n`.
587 #[stable(feature = "rust1", since = "1.0.0")]
588 pub struct Lines<'a> {
589 inner: CharSplits<'a, char>,
592 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
593 #[stable(feature = "rust1", since = "1.0.0")]
594 pub struct LinesAny<'a> {
595 inner: Map<Lines<'a>, fn(&str) -> &str>,
598 impl<'a, P: Pattern<'a>> CharSplits<'a, P> {
600 fn get_end(&mut self) -> Option<&'a str> {
601 if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
602 self.finished = true;
604 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
613 #[stable(feature = "rust1", since = "1.0.0")]
614 impl<'a, P: Pattern<'a>> Iterator for CharSplits<'a, P> {
618 fn next(&mut self) -> Option<&'a str> {
619 if self.finished { return None }
621 let haystack = self.matcher.haystack();
622 match self.matcher.next_match() {
623 Some((a, b)) => unsafe {
624 let elt = haystack.slice_unchecked(self.start, a);
628 None => self.get_end(),
633 #[stable(feature = "rust1", since = "1.0.0")]
634 impl<'a, P: Pattern<'a>> DoubleEndedIterator for CharSplits<'a, P>
635 where P::Searcher: DoubleEndedSearcher<'a> {
637 fn next_back(&mut self) -> Option<&'a str> {
638 if self.finished { return None }
640 if !self.allow_trailing_empty {
641 self.allow_trailing_empty = true;
642 match self.next_back() {
643 Some(elt) if !elt.is_empty() => return Some(elt),
644 _ => if self.finished { return None }
648 let haystack = self.matcher.haystack();
649 match self.matcher.next_match_back() {
650 Some((a, b)) => unsafe {
651 let elt = haystack.slice_unchecked(b, self.end);
656 self.finished = true;
657 Some(haystack.slice_unchecked(self.start, self.end))
663 #[stable(feature = "rust1", since = "1.0.0")]
664 impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P>
665 where P::Searcher: DoubleEndedSearcher<'a> {
669 fn next(&mut self) -> Option<&'a str> {
672 if self.invert { self.iter.next_back() } else { self.iter.next() }
679 impl<'a, P: Pattern<'a>> RCharSplits<'a, P> {
681 fn get_remainder(&mut self) -> Option<&'a str> {
682 if !self.finished && (self.allow_final_empty || self.end - self.start > 0) {
683 self.finished = true;
685 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
694 #[stable(feature = "rust1", since = "1.0.0")]
695 impl<'a, P: Pattern<'a>> Iterator for RCharSplits<'a, P>
696 where P::Searcher: ReverseSearcher<'a>
701 fn next(&mut self) -> Option<&'a str> {
702 if self.finished { return None }
704 let haystack = self.matcher.haystack();
705 match self.matcher.next_match_back() {
706 Some((a, b)) => unsafe {
707 let elt = haystack.slice_unchecked(b, self.end);
711 None => self.get_remainder(),
716 /// The internal state of an iterator that searches for matches of a substring
717 /// within a larger string using two-way search
719 struct TwoWaySearcher {
731 This is the Two-Way search algorithm, which was introduced in the paper:
732 Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675.
734 Here's some background information.
736 A *word* is a string of symbols. The *length* of a word should be a familiar
737 notion, and here we denote it for any word x by |x|.
738 (We also allow for the possibility of the *empty word*, a word of length zero).
740 If x is any non-empty word, then an integer p with 0 < p <= |x| is said to be a
741 *period* for x iff for all i with 0 <= i <= |x| - p - 1, we have x[i] == x[i+p].
742 For example, both 1 and 2 are periods for the string "aa". As another example,
743 the only period of the string "abcd" is 4.
745 We denote by period(x) the *smallest* period of x (provided that x is non-empty).
746 This is always well-defined since every non-empty word x has at least one period,
747 |x|. We sometimes call this *the period* of x.
749 If u, v and x are words such that x = uv, where uv is the concatenation of u and
750 v, then we say that (u, v) is a *factorization* of x.
752 Let (u, v) be a factorization for a word x. Then if w is a non-empty word such
753 that both of the following hold
755 - either w is a suffix of u or u is a suffix of w
756 - either w is a prefix of v or v is a prefix of w
758 then w is said to be a *repetition* for the factorization (u, v).
760 Just to unpack this, there are four possibilities here. Let w = "abc". Then we
763 - w is a suffix of u and w is a prefix of v. ex: ("lolabc", "abcde")
764 - w is a suffix of u and v is a prefix of w. ex: ("lolabc", "ab")
765 - u is a suffix of w and w is a prefix of v. ex: ("bc", "abchi")
766 - u is a suffix of w and v is a prefix of w. ex: ("bc", "a")
768 Note that the word vu is a repetition for any factorization (u,v) of x = uv,
769 so every factorization has at least one repetition.
771 If x is a string and (u, v) is a factorization for x, then a *local period* for
772 (u, v) is an integer r such that there is some word w such that |w| = r and w is
773 a repetition for (u, v).
775 We denote by local_period(u, v) the smallest local period of (u, v). We sometimes
776 call this *the local period* of (u, v). Provided that x = uv is non-empty, this
777 is well-defined (because each non-empty word has at least one factorization, as
780 It can be proven that the following is an equivalent definition of a local period
781 for a factorization (u, v): any positive integer r such that x[i] == x[i+r] for
782 all i such that |u| - r <= i <= |u| - 1 and such that both x[i] and x[i+r] are
783 defined. (i.e. i > 0 and i + r < |x|).
785 Using the above reformulation, it is easy to prove that
787 1 <= local_period(u, v) <= period(uv)
789 A factorization (u, v) of x such that local_period(u,v) = period(x) is called a
790 *critical factorization*.
792 The algorithm hinges on the following theorem, which is stated without proof:
794 **Critical Factorization Theorem** Any word x has at least one critical
795 factorization (u, v) such that |u| < period(x).
797 The purpose of maximal_suffix is to find such a critical factorization.
800 impl TwoWaySearcher {
802 fn new(needle: &[u8]) -> TwoWaySearcher {
803 let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false);
804 let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true);
806 let (crit_pos, period) =
807 if crit_pos_false > crit_pos_true {
808 (crit_pos_false, period_false)
810 (crit_pos_true, period_true)
813 // This isn't in the original algorithm, as far as I'm aware.
814 let byteset = needle.iter()
815 .fold(0, |a, &b| (1 << ((b & 0x3f) as usize)) | a);
817 // A particularly readable explanation of what's going on here can be found
818 // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
819 // see the code for "Algorithm CP" on p. 323.
821 // What's going on is we have some critical factorization (u, v) of the
822 // needle, and we want to determine whether u is a suffix of
823 // &v[..period]. If it is, we use "Algorithm CP1". Otherwise we use
824 // "Algorithm CP2", which is optimized for when the period of the needle
826 if &needle[..crit_pos] == &needle[period.. period + crit_pos] {
838 period: cmp::max(crit_pos, needle.len() - crit_pos) + 1,
842 memory: usize::MAX // Dummy value to signify that the period is long
847 // One of the main ideas of Two-Way is that we factorize the needle into
848 // two halves, (u, v), and begin trying to find v in the haystack by scanning
849 // left to right. If v matches, we try to match u by scanning right to left.
850 // How far we can jump when we encounter a mismatch is all based on the fact
851 // that (u, v) is a critical factorization for the needle.
853 fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
854 -> Option<(usize, usize)> {
856 // Check that we have room to search in
857 if self.position + needle.len() > haystack.len() {
861 // Quickly skip by large portions unrelated to our substring
863 ((haystack[self.position + needle.len() - 1] & 0x3f)
864 as usize)) & 1 == 0 {
865 self.position += needle.len();
872 // See if the right part of the needle matches
873 let start = if long_period { self.crit_pos }
874 else { cmp::max(self.crit_pos, self.memory) };
875 for i in start..needle.len() {
876 if needle[i] != haystack[self.position + i] {
877 self.position += i - self.crit_pos + 1;
885 // See if the left part of the needle matches
886 let start = if long_period { 0 } else { self.memory };
887 for i in (start..self.crit_pos).rev() {
888 if needle[i] != haystack[self.position + i] {
889 self.position += self.period;
891 self.memory = needle.len() - self.period;
897 // We have found a match!
898 let match_pos = self.position;
899 self.position += needle.len(); // add self.period for all matches
901 self.memory = 0; // set to needle.len() - self.period for all matches
903 return Some((match_pos, match_pos + needle.len()));
907 // Computes a critical factorization (u, v) of `arr`.
908 // Specifically, returns (i, p), where i is the starting index of v in some
909 // critical factorization (u, v) and p = period(v)
912 fn maximal_suffix(arr: &[u8], reversed: bool) -> (usize, usize) {
913 use num::wrapping::WrappingOps;
914 let mut left = -1; // Corresponds to i in the paper
915 let mut right = 0; // Corresponds to j in the paper
916 let mut offset = 1; // Corresponds to k in the paper
917 let mut period = 1; // Corresponds to p in the paper
919 while right + offset < arr.len() {
923 a = arr[left.wrapping_add(offset)];
924 b = arr[right + offset];
926 a = arr[right + offset];
927 b = arr[left.wrapping_add(offset)];
930 // Suffix is smaller, period is entire prefix so far.
933 period = right.wrapping_sub(left);
935 // Advance through repetition of the current period.
936 if offset == period {
943 // Suffix is larger, start over from current location.
950 (left.wrapping_add(1), period)
954 /// The internal state of an iterator that searches for matches of a substring
955 /// within a larger string using a dynamically chosen search algorithm
957 // NB: This is kept around for convenience because
958 // it is planned to be used again in the future
960 TwoWay(TwoWaySearcher),
961 TwoWayLong(TwoWaySearcher),
966 fn new(haystack: &[u8], needle: &[u8]) -> OldSearcher {
967 if needle.len() == 0 {
971 // FIXME(#16715): This unsigned integer addition will probably not
972 // overflow because that would mean that the memory almost solely
973 // consists of the needle. Needs #16715 to be formally fixed.
974 } else if needle.len() + 20 > haystack.len() {
975 // Use naive searcher
978 let searcher = TwoWaySearcher::new(needle);
979 if searcher.memory == usize::MAX { // If the period is long
989 // NB: This is kept around for convenience because
990 // it is planned to be used again in the future
991 struct OldMatchIndices<'a, 'b> {
995 searcher: OldSearcher
998 // FIXME: #21637 Prevents a Clone impl
999 /// An iterator over the start and end indices of the matches of a
1000 /// substring within a larger string
1001 #[unstable(feature = "core", reason = "type may be removed")]
1002 pub struct MatchIndices<'a, P: Pattern<'a>>(P::Searcher);
1004 #[stable(feature = "rust1", since = "1.0.0")]
1005 impl<'a, P: Pattern<'a>> Iterator for MatchIndices<'a, P> {
1006 type Item = (usize, usize);
1009 fn next(&mut self) -> Option<(usize, usize)> {
1014 /// An iterator over the substrings of a string separated by a given
1016 #[unstable(feature = "core")]
1017 #[deprecated(since = "1.0.0", reason = "use `Split` with a `&str`")]
1018 pub struct SplitStr<'a, P: Pattern<'a>>(Split<'a, P>);
1019 #[allow(deprecated)]
1020 impl<'a, P: Pattern<'a>> Iterator for SplitStr<'a, P> {
1021 type Item = &'a str;
1024 #[allow(deprecated)]
1025 fn next(&mut self) -> Option<&'a str> {
1026 Iterator::next(&mut self.0)
1030 impl<'a, 'b> OldMatchIndices<'a, 'b> {
1033 fn next(&mut self) -> Option<(usize, usize)> {
1034 match self.searcher {
1035 TwoWay(ref mut searcher)
1036 => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), false),
1037 TwoWayLong(ref mut searcher)
1038 => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true),
1044 Section: Comparing strings
1047 // share the implementation of the lang-item vs. non-lang-item
1049 /// NOTE: This function is (ab)used in rustc::middle::trans::_match
1050 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
1052 fn eq_slice_(a: &str, b: &str) -> bool {
1053 // NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
1054 #[allow(improper_ctypes)]
1055 extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
1056 a.len() == b.len() && unsafe {
1057 memcmp(a.as_ptr() as *const i8,
1058 b.as_ptr() as *const i8,
1063 /// Bytewise slice equality
1064 /// NOTE: This function is (ab)used in rustc::middle::trans::_match
1065 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
1068 fn eq_slice(a: &str, b: &str) -> bool {
1076 /// Walk through `iter` checking that it's a valid UTF-8 sequence,
1077 /// returning `true` in that case, or, if it is invalid, `false` with
1078 /// `iter` reset such that it is pointing at the first byte in the
1079 /// invalid sequence.
1081 fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
1082 -> Result<(), Utf8Error> {
1083 let whole = iter.as_slice();
1085 // save the current thing we're pointing at.
1086 let old = iter.clone();
1088 // restore the iterator we had at the start of this codepoint.
1089 macro_rules! err { () => {{
1090 *iter = old.clone();
1091 return Err(Utf8Error::InvalidByte(whole.len() - iter.as_slice().len()))
1094 macro_rules! next { () => {
1097 // we needed data, but there was none: error!
1098 None => return Err(Utf8Error::TooShort),
1102 let first = match iter.next() {
1104 // we're at the end of the iterator and a codepoint
1105 // boundary at the same time, so this string is valid.
1106 None => return Ok(())
1109 // ASCII characters are always valid, so only large
1110 // bytes need more examination.
1112 let w = UTF8_CHAR_WIDTH[first as usize];
1113 let second = next!();
1114 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
1115 // first C2 80 last DF BF
1116 // 3-byte encoding is for codepoints \u{0800} to \u{ffff}
1117 // first E0 A0 80 last EF BF BF
1118 // excluding surrogates codepoints \u{d800} to \u{dfff}
1119 // ED A0 80 to ED BF BF
1120 // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1121 // first F0 90 80 80 last F4 8F BF BF
1123 // Use the UTF-8 syntax from the RFC
1125 // https://tools.ietf.org/html/rfc3629
1127 // UTF8-2 = %xC2-DF UTF8-tail
1128 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1129 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1130 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1131 // %xF4 %x80-8F 2( UTF8-tail )
1133 2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()},
1135 match (first, second, next!() & !CONT_MASK) {
1136 (0xE0 , 0xA0 ... 0xBF, TAG_CONT_U8) |
1137 (0xE1 ... 0xEC, 0x80 ... 0xBF, TAG_CONT_U8) |
1138 (0xED , 0x80 ... 0x9F, TAG_CONT_U8) |
1139 (0xEE ... 0xEF, 0x80 ... 0xBF, TAG_CONT_U8) => {}
1144 match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
1145 (0xF0 , 0x90 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1146 (0xF1 ... 0xF3, 0x80 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1147 (0xF4 , 0x80 ... 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
1157 // https://tools.ietf.org/html/rfc3629
1158 static UTF8_CHAR_WIDTH: [u8; 256] = [
1159 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1160 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
1161 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1162 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
1163 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1164 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
1165 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1166 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
1167 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1168 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
1169 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1170 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
1171 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1172 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
1173 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
1174 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
1177 /// Struct that contains a `char` and the index of the first byte of
1178 /// the next `char` in a string. This can be used as a data structure
1179 /// for iterating over the UTF-8 bytes of a string.
1181 #[unstable(feature = "str_char",
1182 reason = "existence of this struct is uncertain as it is frequently \
1183 able to be replaced with char.len_utf8() and/or \
1184 char/char_indices iterators")]
1185 pub struct CharRange {
1188 /// Index of the first byte of the next `char`
1192 /// Mask of the value bits of a continuation byte
1193 const CONT_MASK: u8 = 0b0011_1111;
1194 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
1195 const TAG_CONT_U8: u8 = 0b1000_0000;
1198 Section: Trait implementations
1202 use cmp::{Ordering, Ord, PartialEq, PartialOrd, Eq};
1203 use cmp::Ordering::{Less, Equal, Greater};
1204 use iter::IteratorExt;
1206 use option::Option::Some;
1208 use str::{StrExt, eq_slice};
1210 #[stable(feature = "rust1", since = "1.0.0")]
1213 fn cmp(&self, other: &str) -> Ordering {
1214 for (s_b, o_b) in self.bytes().zip(other.bytes()) {
1215 match s_b.cmp(&o_b) {
1216 Greater => return Greater,
1217 Less => return Less,
1222 self.len().cmp(&other.len())
1226 #[stable(feature = "rust1", since = "1.0.0")]
1227 impl PartialEq for str {
1229 fn eq(&self, other: &str) -> bool {
1230 eq_slice(self, other)
1233 fn ne(&self, other: &str) -> bool { !(*self).eq(other) }
1236 #[stable(feature = "rust1", since = "1.0.0")]
1239 #[stable(feature = "rust1", since = "1.0.0")]
1240 impl PartialOrd for str {
1242 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1243 Some(self.cmp(other))
1247 /// Returns a slice of the given string from the byte range
1248 /// [`begin`..`end`).
1250 /// This operation is `O(1)`.
1252 /// Panics when `begin` and `end` do not point to valid characters
1253 /// or point beyond the last character of the string.
1258 /// let s = "Löwe 老虎 Léopard";
1259 /// assert_eq!(&s[0 .. 1], "L");
1261 /// assert_eq!(&s[1 .. 9], "öwe 老");
1263 /// // these will panic:
1264 /// // byte 2 lies within `ö`:
1267 /// // byte 8 lies within `老`
1270 /// // byte 100 is outside the string
1271 /// // &s[3 .. 100];
1273 #[stable(feature = "rust1", since = "1.0.0")]
1274 impl ops::Index<ops::Range<usize>> for str {
1277 fn index(&self, index: &ops::Range<usize>) -> &str {
1278 // is_char_boundary checks that the index is in [0, .len()]
1279 if index.start <= index.end &&
1280 self.is_char_boundary(index.start) &&
1281 self.is_char_boundary(index.end) {
1282 unsafe { self.slice_unchecked(index.start, index.end) }
1284 super::slice_error_fail(self, index.start, index.end)
1289 /// Returns a slice of the string from the beginning to byte
1292 /// Equivalent to `self[0 .. end]`.
1294 /// Panics when `end` does not point to a valid character, or is
1296 #[stable(feature = "rust1", since = "1.0.0")]
1297 impl ops::Index<ops::RangeTo<usize>> for str {
1300 fn index(&self, index: &ops::RangeTo<usize>) -> &str {
1301 // is_char_boundary checks that the index is in [0, .len()]
1302 if self.is_char_boundary(index.end) {
1303 unsafe { self.slice_unchecked(0, index.end) }
1305 super::slice_error_fail(self, 0, index.end)
1310 /// Returns a slice of the string from `begin` to its end.
1312 /// Equivalent to `self[begin .. self.len()]`.
1314 /// Panics when `begin` does not point to a valid character, or is
1316 #[stable(feature = "rust1", since = "1.0.0")]
1317 impl ops::Index<ops::RangeFrom<usize>> for str {
1320 fn index(&self, index: &ops::RangeFrom<usize>) -> &str {
1321 // is_char_boundary checks that the index is in [0, .len()]
1322 if self.is_char_boundary(index.start) {
1323 unsafe { self.slice_unchecked(index.start, self.len()) }
1325 super::slice_error_fail(self, index.start, self.len())
1330 #[stable(feature = "rust1", since = "1.0.0")]
1331 impl ops::Index<ops::RangeFull> for str {
1334 fn index(&self, _index: &ops::RangeFull) -> &str {
1340 /// Any string that can be represented as a slice
1341 #[unstable(feature = "core",
1342 reason = "Instead of taking this bound generically, this trait will be \
1343 replaced with one of slicing syntax (&foo[..]), deref coercions, or \
1344 a more generic conversion trait")]
1346 /// Work with `self` as a slice.
1347 fn as_slice<'a>(&'a self) -> &'a str;
1352 fn as_slice<'a>(&'a self) -> &'a str { self }
1355 impl<'a, S: ?Sized> Str for &'a S where S: Str {
1357 fn as_slice(&self) -> &str { Str::as_slice(*self) }
1360 /// Return type of `StrExt::split`
1361 #[stable(feature = "rust1", since = "1.0.0")]
1362 pub struct Split<'a, P: Pattern<'a>>(CharSplits<'a, P>);
1363 #[stable(feature = "rust1", since = "1.0.0")]
1364 impl<'a, P: Pattern<'a>> Iterator for Split<'a, P> {
1365 type Item = &'a str;
1368 fn next(&mut self) -> Option<&'a str> {
1372 #[stable(feature = "rust1", since = "1.0.0")]
1373 impl<'a, P: Pattern<'a>> DoubleEndedIterator for Split<'a, P>
1374 where P::Searcher: DoubleEndedSearcher<'a> {
1376 fn next_back(&mut self) -> Option<&'a str> {
1381 /// Return type of `StrExt::split_terminator`
1382 #[stable(feature = "rust1", since = "1.0.0")]
1383 pub struct SplitTerminator<'a, P: Pattern<'a>>(CharSplits<'a, P>);
1384 delegate_iter!{pattern &'a str : SplitTerminator<'a, P>}
1386 /// Return type of `StrExt::splitn`
1387 #[stable(feature = "rust1", since = "1.0.0")]
1388 pub struct SplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>);
1389 delegate_iter!{pattern forward &'a str : SplitN<'a, P>}
1391 /// Return type of `StrExt::rsplit`
1392 #[stable(feature = "rust1", since = "1.0.0")]
1393 pub struct RSplit<'a, P: Pattern<'a>>(RCharSplits<'a, P>);
1394 delegate_iter!{pattern reverse &'a str : RSplit<'a, P>}
1396 /// Return type of `StrExt::rsplitn`
1397 #[stable(feature = "rust1", since = "1.0.0")]
1398 pub struct RSplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>);
1399 delegate_iter!{pattern forward &'a str : RSplitN<'a, P>}
1401 /// Methods for string slices
1402 #[allow(missing_docs)]
1404 // NB there are no docs here are they're all located on the StrExt trait in
1405 // libcollections, not here.
1407 fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1408 fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1409 fn chars<'a>(&'a self) -> Chars<'a>;
1410 fn bytes<'a>(&'a self) -> Bytes<'a>;
1411 fn char_indices<'a>(&'a self) -> CharIndices<'a>;
1412 fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
1413 fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
1414 fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
1415 fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1416 where P::Searcher: ReverseSearcher<'a>;
1417 fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>;
1418 fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
1419 #[allow(deprecated) /* for SplitStr */]
1420 fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P>;
1421 fn lines<'a>(&'a self) -> Lines<'a>;
1422 fn lines_any<'a>(&'a self) -> LinesAny<'a>;
1423 fn char_len(&self) -> usize;
1424 fn slice_chars<'a>(&'a self, begin: usize, end: usize) -> &'a str;
1425 unsafe fn slice_unchecked<'a>(&'a self, begin: usize, end: usize) -> &'a str;
1426 fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1427 fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1428 where P::Searcher: ReverseSearcher<'a>;
1429 fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1430 where P::Searcher: DoubleEndedSearcher<'a>;
1431 fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
1432 fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1433 where P::Searcher: ReverseSearcher<'a>;
1434 fn is_char_boundary(&self, index: usize) -> bool;
1435 fn char_range_at(&self, start: usize) -> CharRange;
1436 fn char_range_at_reverse(&self, start: usize) -> CharRange;
1437 fn char_at(&self, i: usize) -> char;
1438 fn char_at_reverse(&self, i: usize) -> char;
1439 fn as_bytes<'a>(&'a self) -> &'a [u8];
1440 fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1441 fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1442 where P::Searcher: ReverseSearcher<'a>;
1443 fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1444 fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>;
1445 fn subslice_offset(&self, inner: &str) -> usize;
1446 fn as_ptr(&self) -> *const u8;
1447 fn len(&self) -> usize;
1448 fn is_empty(&self) -> bool;
1449 fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
1453 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
1454 assert!(begin <= end);
1455 panic!("index {} and/or {} in `{}` do not lie on character boundary",
1459 impl StrExt for str {
1461 fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1462 pat.is_contained_in(self)
1466 fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1467 pat.is_contained_in(self)
1471 fn chars(&self) -> Chars {
1472 Chars{iter: self.as_bytes().iter()}
1476 fn bytes(&self) -> Bytes {
1477 Bytes(self.as_bytes().iter().map(BytesDeref))
1481 fn char_indices(&self) -> CharIndices {
1482 CharIndices { front_offset: 0, iter: self.chars() }
1486 fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
1490 matcher: pat.into_searcher(self),
1491 allow_trailing_empty: true,
1497 fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
1498 SplitN(CharSplitsN {
1499 iter: self.split(pat).0,
1506 fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1507 SplitTerminator(CharSplits {
1508 allow_trailing_empty: false,
1514 fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1515 where P::Searcher: ReverseSearcher<'a>
1517 RSplit(RCharSplits {
1520 matcher: pat.into_searcher(self),
1521 allow_final_empty: true,
1527 fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> {
1528 RSplitN(CharSplitsN {
1529 iter: self.split(pat).0,
1536 fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
1537 MatchIndices(pat.into_searcher(self))
1541 #[allow(deprecated) /* for SplitStr */ ]
1542 fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P> {
1543 SplitStr(self.split(pat))
1547 fn lines(&self) -> Lines {
1548 Lines { inner: self.split_terminator('\n').0 }
1551 fn lines_any(&self) -> LinesAny {
1552 fn f(line: &str) -> &str {
1554 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1558 let f: fn(&str) -> &str = f; // coerce to fn pointer
1559 LinesAny { inner: self.lines().map(f) }
1563 fn char_len(&self) -> usize { self.chars().count() }
1565 fn slice_chars(&self, begin: usize, end: usize) -> &str {
1566 assert!(begin <= end);
1568 let mut begin_byte = None;
1569 let mut end_byte = None;
1571 // This could be even more efficient by not decoding,
1572 // only finding the char boundaries
1573 for (idx, _) in self.char_indices() {
1574 if count == begin { begin_byte = Some(idx); }
1575 if count == end { end_byte = Some(idx); break; }
1578 if begin_byte.is_none() && count == begin { begin_byte = Some(self.len()) }
1579 if end_byte.is_none() && count == end { end_byte = Some(self.len()) }
1581 match (begin_byte, end_byte) {
1582 (None, _) => panic!("slice_chars: `begin` is beyond end of string"),
1583 (_, None) => panic!("slice_chars: `end` is beyond end of string"),
1584 (Some(a), Some(b)) => unsafe { self.slice_unchecked(a, b) }
1589 unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
1590 mem::transmute(Slice {
1591 data: self.as_ptr().offset(begin as int),
1597 fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1598 pat.is_prefix_of(self)
1602 fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1603 where P::Searcher: ReverseSearcher<'a>
1605 pat.is_suffix_of(self)
1609 fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1610 where P::Searcher: DoubleEndedSearcher<'a>
1614 let mut matcher = pat.into_searcher(self);
1615 if let Some((a, b)) = matcher.next_reject() {
1617 j = b; // Rember earliest known match, correct it below if
1618 // last match is different
1620 if let Some((_, b)) = matcher.next_reject_back() {
1624 // Searcher is known to return valid indices
1625 self.slice_unchecked(i, j)
1630 fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1631 let mut i = self.len();
1632 let mut matcher = pat.into_searcher(self);
1633 if let Some((a, _)) = matcher.next_reject() {
1637 // Searcher is known to return valid indices
1638 self.slice_unchecked(i, self.len())
1643 fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1644 where P::Searcher: ReverseSearcher<'a>
1647 let mut matcher = pat.into_searcher(self);
1648 if let Some((_, b)) = matcher.next_reject_back() {
1652 // Searcher is known to return valid indices
1653 self.slice_unchecked(0, j)
1658 fn is_char_boundary(&self, index: usize) -> bool {
1659 if index == self.len() { return true; }
1660 match self.as_bytes().get(index) {
1662 Some(&b) => b < 128 || b >= 192,
1667 fn char_range_at(&self, i: usize) -> CharRange {
1668 let (c, n) = char_range_at_raw(self.as_bytes(), i);
1669 CharRange { ch: unsafe { mem::transmute(c) }, next: n }
1673 fn char_range_at_reverse(&self, start: usize) -> CharRange {
1674 let mut prev = start;
1676 prev = prev.saturating_sub(1);
1677 if self.as_bytes()[prev] < 128 {
1678 return CharRange{ch: self.as_bytes()[prev] as char, next: prev}
1681 // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
1682 fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange {
1683 // while there is a previous byte == 10......
1684 while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
1688 let first= s.as_bytes()[i];
1689 let w = UTF8_CHAR_WIDTH[first as usize];
1692 let mut val = utf8_first_byte(first, w as u32);
1693 val = utf8_acc_cont_byte(val, s.as_bytes()[i + 1]);
1694 if w > 2 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 2]); }
1695 if w > 3 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 3]); }
1697 return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
1700 return multibyte_char_range_at_reverse(self, prev);
1704 fn char_at(&self, i: usize) -> char {
1705 self.char_range_at(i).ch
1709 fn char_at_reverse(&self, i: usize) -> char {
1710 self.char_range_at_reverse(i).ch
1714 fn as_bytes(&self) -> &[u8] {
1715 unsafe { mem::transmute(self) }
1718 fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1719 pat.into_searcher(self).next_match().map(|(i, _)| i)
1722 fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1723 where P::Searcher: ReverseSearcher<'a>
1725 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
1728 fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1733 fn slice_shift_char(&self) -> Option<(char, &str)> {
1734 if self.is_empty() {
1737 let ch = self.char_at(0);
1738 let next_s = unsafe { self.slice_unchecked(ch.len_utf8(), self.len()) };
1743 fn subslice_offset(&self, inner: &str) -> usize {
1744 let a_start = self.as_ptr() as usize;
1745 let a_end = a_start + self.len();
1746 let b_start = inner.as_ptr() as usize;
1747 let b_end = b_start + inner.len();
1749 assert!(a_start <= b_start);
1750 assert!(b_end <= a_end);
1755 fn as_ptr(&self) -> *const u8 {
1760 fn len(&self) -> usize { self.repr().len }
1763 fn is_empty(&self) -> bool { self.len() == 0 }
1766 fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) }
1769 /// Pluck a code point out of a UTF-8-like byte slice and return the
1770 /// index of the next code point.
1772 #[unstable(feature = "core")]
1773 pub fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
1775 return (bytes[i] as u32, i + 1);
1778 // Multibyte case is a fn to allow char_range_at to inline cleanly
1779 fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
1780 let first = bytes[i];
1781 let w = UTF8_CHAR_WIDTH[first as usize];
1784 let mut val = utf8_first_byte(first, w as u32);
1785 val = utf8_acc_cont_byte(val, bytes[i + 1]);
1786 if w > 2 { val = utf8_acc_cont_byte(val, bytes[i + 2]); }
1787 if w > 3 { val = utf8_acc_cont_byte(val, bytes[i + 3]); }
1789 return (val, i + w as usize);
1792 multibyte_char_range_at(bytes, i)
1795 #[stable(feature = "rust1", since = "1.0.0")]
1796 impl<'a> Default for &'a str {
1797 #[stable(feature = "rust1", since = "1.0.0")]
1798 fn default() -> &'a str { "" }
1801 #[stable(feature = "rust1", since = "1.0.0")]
1802 impl<'a> Iterator for Lines<'a> {
1803 type Item = &'a str;
1806 fn next(&mut self) -> Option<&'a str> { self.inner.next() }
1808 fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
1811 #[stable(feature = "rust1", since = "1.0.0")]
1812 impl<'a> DoubleEndedIterator for Lines<'a> {
1814 fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
1817 #[stable(feature = "rust1", since = "1.0.0")]
1818 impl<'a> Iterator for LinesAny<'a> {
1819 type Item = &'a str;
1822 fn next(&mut self) -> Option<&'a str> { self.inner.next() }
1824 fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
1827 #[stable(feature = "rust1", since = "1.0.0")]
1828 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1830 fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }