1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! String manipulation
13 //! For more details, see std::str
15 #![stable(feature = "rust1", since = "1.0.0")]
17 use self::pattern::Pattern;
18 use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
22 use iter::{Map, Cloned, FusedIterator};
28 /// A trait to abstract the idea of creating a new instance of a type from a
31 /// `FromStr`'s [`from_str()`] method is often used implicitly, through
32 /// [`str`]'s [`parse()`] method. See [`parse()`]'s documentation for examples.
34 /// [`from_str()`]: #tymethod.from_str
35 /// [`str`]: ../../std/primitive.str.html
36 /// [`parse()`]: ../../std/primitive.str.html#method.parse
37 #[stable(feature = "rust1", since = "1.0.0")]
38 pub trait FromStr: Sized {
39 /// The associated error which can be returned from parsing.
40 #[stable(feature = "rust1", since = "1.0.0")]
43 /// Parses a string `s` to return a value of this type.
45 /// If parsing succeeds, return the value inside `Ok`, otherwise
46 /// when the string is ill-formatted return an error specific to the
47 /// inside `Err`. The error type is specific to implementation of the trait.
51 /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
53 /// [ithirtytwo]: ../../std/primitive.i32.html
56 /// use std::str::FromStr;
59 /// let x = i32::from_str(s).unwrap();
63 #[stable(feature = "rust1", since = "1.0.0")]
64 fn from_str(s: &str) -> Result<Self, Self::Err>;
67 #[stable(feature = "rust1", since = "1.0.0")]
68 impl FromStr for bool {
69 type Err = ParseBoolError;
71 /// Parse a `bool` from a string.
73 /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
74 /// actually be parseable.
79 /// use std::str::FromStr;
81 /// assert_eq!(FromStr::from_str("true"), Ok(true));
82 /// assert_eq!(FromStr::from_str("false"), Ok(false));
83 /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
86 /// Note, in many cases, the `.parse()` method on `str` is more proper.
89 /// assert_eq!("true".parse(), Ok(true));
90 /// assert_eq!("false".parse(), Ok(false));
91 /// assert!("not even a boolean".parse::<bool>().is_err());
94 fn from_str(s: &str) -> Result<bool, ParseBoolError> {
98 _ => Err(ParseBoolError { _priv: () }),
103 /// An error returned when parsing a `bool` from a string fails.
104 #[derive(Debug, Clone, PartialEq, Eq)]
105 #[stable(feature = "rust1", since = "1.0.0")]
106 pub struct ParseBoolError { _priv: () }
108 #[stable(feature = "rust1", since = "1.0.0")]
109 impl fmt::Display for ParseBoolError {
110 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
111 "provided string was not `true` or `false`".fmt(f)
116 Section: Creating a string
119 /// Errors which can occur when attempting to interpret a sequence of `u8`
122 /// As such, the `from_utf8` family of functions and methods for both `String`s
123 /// and `&str`s make use of this error, for example.
124 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
125 #[stable(feature = "rust1", since = "1.0.0")]
126 pub struct Utf8Error {
131 /// Returns the index in the given string up to which valid UTF-8 was
134 /// It is the maximum index such that `from_utf8(input[..index])`
135 /// would return `Ok(_)`.
144 /// // some invalid bytes, in a vector
145 /// let sparkle_heart = vec![0, 159, 146, 150];
147 /// // std::str::from_utf8 returns a Utf8Error
148 /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
150 /// // the second byte is invalid here
151 /// assert_eq!(1, error.valid_up_to());
153 #[stable(feature = "utf8_error", since = "1.5.0")]
154 pub fn valid_up_to(&self) -> usize { self.valid_up_to }
157 /// Converts a slice of bytes to a string slice.
159 /// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
160 /// is made of bytes, so this function converts between the two. Not all byte
161 /// slices are valid string slices, however: `&str` requires that it is valid
162 /// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
163 /// then does the conversion.
165 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
166 /// incur the overhead of the validity check, there is an unsafe version of
167 /// this function, [`from_utf8_unchecked()`][fromutf8u], which has the same
168 /// behavior but skips the check.
170 /// [fromutf8u]: fn.from_utf8_unchecked.html
172 /// If you need a `String` instead of a `&str`, consider
173 /// [`String::from_utf8()`][string].
175 /// [string]: ../../std/string/struct.String.html#method.from_utf8
177 /// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
178 /// it, this function is one way to have a stack-allocated string. There is
179 /// an example of this in the examples section below.
183 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
184 /// provided slice is not UTF-8.
193 /// // some bytes, in a vector
194 /// let sparkle_heart = vec![240, 159, 146, 150];
196 /// // We know these bytes are valid, so just use `unwrap()`.
197 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
199 /// assert_eq!("💖", sparkle_heart);
207 /// // some invalid bytes, in a vector
208 /// let sparkle_heart = vec![0, 159, 146, 150];
210 /// assert!(str::from_utf8(&sparkle_heart).is_err());
213 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
214 /// errors that can be returned.
216 /// [error]: struct.Utf8Error.html
218 /// A "stack allocated string":
223 /// // some bytes, in a stack-allocated array
224 /// let sparkle_heart = [240, 159, 146, 150];
226 /// // We know these bytes are valid, so just use `unwrap()`.
227 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
229 /// assert_eq!("💖", sparkle_heart);
231 #[stable(feature = "rust1", since = "1.0.0")]
232 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
233 run_utf8_validation(v)?;
234 Ok(unsafe { from_utf8_unchecked(v) })
237 /// Forms a str from a pointer and a length.
239 /// The `len` argument is the number of bytes in the string.
243 /// This function is unsafe as there is no guarantee that the given pointer is
244 /// valid for `len` bytes, nor whether the lifetime inferred is a suitable
245 /// lifetime for the returned str.
247 /// The data must be valid UTF-8
249 /// `p` must be non-null, even for zero-length str.
253 /// The lifetime for the returned str is inferred from its usage. To
254 /// prevent accidental misuse, it's suggested to tie the lifetime to whichever
255 /// source lifetime is safe in the context, such as by providing a helper
256 /// function taking the lifetime of a host value for the str, or by explicit
258 /// Performs the same functionality as `from_raw_parts`, except that a mutable
261 unsafe fn from_raw_parts_mut<'a>(p: *mut u8, len: usize) -> &'a mut str {
262 mem::transmute::<&mut [u8], &mut str>(slice::from_raw_parts_mut(p, len))
265 /// Converts a slice of bytes to a string slice without checking
266 /// that the string contains valid UTF-8.
268 /// See the safe version, [`from_utf8()`][fromutf8], for more information.
270 /// [fromutf8]: fn.from_utf8.html
274 /// This function is unsafe because it does not check that the bytes passed to
275 /// it are valid UTF-8. If this constraint is violated, undefined behavior
276 /// results, as the rest of Rust assumes that `&str`s are valid UTF-8.
285 /// // some bytes, in a vector
286 /// let sparkle_heart = vec![240, 159, 146, 150];
288 /// let sparkle_heart = unsafe {
289 /// str::from_utf8_unchecked(&sparkle_heart)
292 /// assert_eq!("💖", sparkle_heart);
295 #[stable(feature = "rust1", since = "1.0.0")]
296 pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
300 #[stable(feature = "rust1", since = "1.0.0")]
301 impl fmt::Display for Utf8Error {
302 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
303 write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to)
311 /// Iterator for the char (representing *Unicode Scalar Values*) of a string
313 /// Created with the method [`chars()`].
315 /// [`chars()`]: ../../std/primitive.str.html#method.chars
316 #[derive(Clone, Debug)]
317 #[stable(feature = "rust1", since = "1.0.0")]
318 pub struct Chars<'a> {
319 iter: slice::Iter<'a, u8>
322 /// Return the initial codepoint accumulator for the first byte.
323 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
324 /// for width 3, and 3 bits for width 4.
326 fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
328 /// Return the value of `ch` updated with continuation byte `byte`.
330 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
332 /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
335 fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
338 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
345 /// Reads the next code point out of a byte iterator (assuming a
346 /// UTF-8-like encoding).
347 #[unstable(feature = "str_internals", issue = "0")]
349 pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
351 let x = match bytes.next() {
353 Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
354 Some(&next_byte) => next_byte,
357 // Multibyte case follows
358 // Decode from a byte combination out of: [[[x y] z] w]
359 // NOTE: Performance is sensitive to the exact formulation here
360 let init = utf8_first_byte(x, 2);
361 let y = unwrap_or_0(bytes.next());
362 let mut ch = utf8_acc_cont_byte(init, y);
365 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
366 let z = unwrap_or_0(bytes.next());
367 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
368 ch = init << 12 | y_z;
371 // use only the lower 3 bits of `init`
372 let w = unwrap_or_0(bytes.next());
373 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
380 /// Reads the last code point out of a byte iterator (assuming a
381 /// UTF-8-like encoding).
383 fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
384 where I: DoubleEndedIterator<Item = &'a u8>,
387 let w = match bytes.next_back() {
389 Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
390 Some(&back_byte) => back_byte,
393 // Multibyte case follows
394 // Decode from a byte combination out of: [x [y [z w]]]
396 let z = unwrap_or_0(bytes.next_back());
397 ch = utf8_first_byte(z, 2);
398 if utf8_is_cont_byte(z) {
399 let y = unwrap_or_0(bytes.next_back());
400 ch = utf8_first_byte(y, 3);
401 if utf8_is_cont_byte(y) {
402 let x = unwrap_or_0(bytes.next_back());
403 ch = utf8_first_byte(x, 4);
404 ch = utf8_acc_cont_byte(ch, y);
406 ch = utf8_acc_cont_byte(ch, z);
408 ch = utf8_acc_cont_byte(ch, w);
413 #[stable(feature = "rust1", since = "1.0.0")]
414 impl<'a> Iterator for Chars<'a> {
418 fn next(&mut self) -> Option<char> {
419 next_code_point(&mut self.iter).map(|ch| {
420 // str invariant says `ch` is a valid Unicode Scalar Value
422 char::from_u32_unchecked(ch)
428 fn count(self) -> usize {
429 // length in `char` is equal to the number of non-continuation bytes
430 let bytes_len = self.iter.len();
431 let mut cont_bytes = 0;
432 for &byte in self.iter {
433 cont_bytes += utf8_is_cont_byte(byte) as usize;
435 bytes_len - cont_bytes
439 fn size_hint(&self) -> (usize, Option<usize>) {
440 let len = self.iter.len();
441 // `(len + 3)` can't overflow, because we know that the `slice::Iter`
442 // belongs to a slice in memory which has a maximum length of
443 // `isize::MAX` (that's well below `usize::MAX`).
444 ((len + 3) / 4, Some(len))
448 fn last(mut self) -> Option<char> {
449 // No need to go through the entire string.
454 #[stable(feature = "rust1", since = "1.0.0")]
455 impl<'a> DoubleEndedIterator for Chars<'a> {
457 fn next_back(&mut self) -> Option<char> {
458 next_code_point_reverse(&mut self.iter).map(|ch| {
459 // str invariant says `ch` is a valid Unicode Scalar Value
461 char::from_u32_unchecked(ch)
467 #[unstable(feature = "fused", issue = "35602")]
468 impl<'a> FusedIterator for Chars<'a> {}
471 /// View the underlying data as a subslice of the original data.
473 /// This has the same lifetime as the original slice, and so the
474 /// iterator can continue to be used while this exists.
479 /// let mut chars = "abc".chars();
481 /// assert_eq!(chars.as_str(), "abc");
483 /// assert_eq!(chars.as_str(), "bc");
486 /// assert_eq!(chars.as_str(), "");
488 #[stable(feature = "iter_to_slice", since = "1.4.0")]
490 pub fn as_str(&self) -> &'a str {
491 unsafe { from_utf8_unchecked(self.iter.as_slice()) }
495 /// Iterator for a string's characters and their byte offsets.
496 #[derive(Clone, Debug)]
497 #[stable(feature = "rust1", since = "1.0.0")]
498 pub struct CharIndices<'a> {
503 #[stable(feature = "rust1", since = "1.0.0")]
504 impl<'a> Iterator for CharIndices<'a> {
505 type Item = (usize, char);
508 fn next(&mut self) -> Option<(usize, char)> {
509 let pre_len = self.iter.iter.len();
510 match self.iter.next() {
513 let index = self.front_offset;
514 let len = self.iter.iter.len();
515 self.front_offset += pre_len - len;
522 fn count(self) -> usize {
527 fn size_hint(&self) -> (usize, Option<usize>) {
528 self.iter.size_hint()
532 fn last(mut self) -> Option<(usize, char)> {
533 // No need to go through the entire string.
538 #[stable(feature = "rust1", since = "1.0.0")]
539 impl<'a> DoubleEndedIterator for CharIndices<'a> {
541 fn next_back(&mut self) -> Option<(usize, char)> {
542 match self.iter.next_back() {
545 let index = self.front_offset + self.iter.iter.len();
552 #[unstable(feature = "fused", issue = "35602")]
553 impl<'a> FusedIterator for CharIndices<'a> {}
555 impl<'a> CharIndices<'a> {
556 /// View the underlying data as a subslice of the original data.
558 /// This has the same lifetime as the original slice, and so the
559 /// iterator can continue to be used while this exists.
560 #[stable(feature = "iter_to_slice", since = "1.4.0")]
562 pub fn as_str(&self) -> &'a str {
567 /// External iterator for a string's bytes.
568 /// Use with the `std::iter` module.
570 /// Created with the method [`bytes()`].
572 /// [`bytes()`]: ../../std/primitive.str.html#method.bytes
573 #[stable(feature = "rust1", since = "1.0.0")]
574 #[derive(Clone, Debug)]
575 pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>);
577 #[stable(feature = "rust1", since = "1.0.0")]
578 impl<'a> Iterator for Bytes<'a> {
582 fn next(&mut self) -> Option<u8> {
587 fn size_hint(&self) -> (usize, Option<usize>) {
592 fn count(self) -> usize {
597 fn last(self) -> Option<Self::Item> {
602 fn nth(&mut self, n: usize) -> Option<Self::Item> {
607 #[stable(feature = "rust1", since = "1.0.0")]
608 impl<'a> DoubleEndedIterator for Bytes<'a> {
610 fn next_back(&mut self) -> Option<u8> {
615 #[stable(feature = "rust1", since = "1.0.0")]
616 impl<'a> ExactSizeIterator for Bytes<'a> {
618 fn len(&self) -> usize {
623 fn is_empty(&self) -> bool {
628 #[unstable(feature = "fused", issue = "35602")]
629 impl<'a> FusedIterator for Bytes<'a> {}
631 /// This macro generates a Clone impl for string pattern API
632 /// wrapper types of the form X<'a, P>
633 macro_rules! derive_pattern_clone {
634 (clone $t:ident with |$s:ident| $e:expr) => {
635 impl<'a, P: Pattern<'a>> Clone for $t<'a, P>
636 where P::Searcher: Clone
638 fn clone(&self) -> Self {
646 /// This macro generates two public iterator structs
647 /// wrapping a private internal one that makes use of the `Pattern` API.
649 /// For all patterns `P: Pattern<'a>` the following items will be
650 /// generated (generics omitted):
652 /// struct $forward_iterator($internal_iterator);
653 /// struct $reverse_iterator($internal_iterator);
655 /// impl Iterator for $forward_iterator
656 /// { /* internal ends up calling Searcher::next_match() */ }
658 /// impl DoubleEndedIterator for $forward_iterator
659 /// where P::Searcher: DoubleEndedSearcher
660 /// { /* internal ends up calling Searcher::next_match_back() */ }
662 /// impl Iterator for $reverse_iterator
663 /// where P::Searcher: ReverseSearcher
664 /// { /* internal ends up calling Searcher::next_match_back() */ }
666 /// impl DoubleEndedIterator for $reverse_iterator
667 /// where P::Searcher: DoubleEndedSearcher
668 /// { /* internal ends up calling Searcher::next_match() */ }
670 /// The internal one is defined outside the macro, and has almost the same
671 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
672 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
674 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
675 /// `Pattern` might not return the same elements, so actually implementing
676 /// `DoubleEndedIterator` for it would be incorrect.
677 /// (See the docs in `str::pattern` for more details)
679 /// However, the internal struct still represents a single ended iterator from
680 /// either end, and depending on pattern is also a valid double ended iterator,
681 /// so the two wrapper structs implement `Iterator`
682 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
683 /// to the complex impls seen above.
684 macro_rules! generate_pattern_iterators {
688 $(#[$forward_iterator_attribute:meta])*
689 struct $forward_iterator:ident;
693 $(#[$reverse_iterator_attribute:meta])*
694 struct $reverse_iterator:ident;
696 // Stability of all generated items
698 $(#[$common_stability_attribute:meta])*
700 // Internal almost-iterator that is being delegated to
702 $internal_iterator:ident yielding ($iterty:ty);
704 // Kind of delgation - either single ended or double ended
707 $(#[$forward_iterator_attribute])*
708 $(#[$common_stability_attribute])*
709 pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
711 $(#[$common_stability_attribute])*
712 impl<'a, P: Pattern<'a>> fmt::Debug for $forward_iterator<'a, P>
713 where P::Searcher: fmt::Debug
715 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
716 f.debug_tuple(stringify!($forward_iterator))
722 $(#[$common_stability_attribute])*
723 impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
727 fn next(&mut self) -> Option<$iterty> {
732 $(#[$common_stability_attribute])*
733 impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P>
734 where P::Searcher: Clone
736 fn clone(&self) -> Self {
737 $forward_iterator(self.0.clone())
741 $(#[$reverse_iterator_attribute])*
742 $(#[$common_stability_attribute])*
743 pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
745 $(#[$common_stability_attribute])*
746 impl<'a, P: Pattern<'a>> fmt::Debug for $reverse_iterator<'a, P>
747 where P::Searcher: fmt::Debug
749 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
750 f.debug_tuple(stringify!($reverse_iterator))
756 $(#[$common_stability_attribute])*
757 impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P>
758 where P::Searcher: ReverseSearcher<'a>
763 fn next(&mut self) -> Option<$iterty> {
768 $(#[$common_stability_attribute])*
769 impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P>
770 where P::Searcher: Clone
772 fn clone(&self) -> Self {
773 $reverse_iterator(self.0.clone())
777 #[unstable(feature = "fused", issue = "35602")]
778 impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {}
780 #[unstable(feature = "fused", issue = "35602")]
781 impl<'a, P: Pattern<'a>> FusedIterator for $reverse_iterator<'a, P>
782 where P::Searcher: ReverseSearcher<'a> {}
784 generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
786 $reverse_iterator, $iterty);
789 double ended; with $(#[$common_stability_attribute:meta])*,
790 $forward_iterator:ident,
791 $reverse_iterator:ident, $iterty:ty
793 $(#[$common_stability_attribute])*
794 impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P>
795 where P::Searcher: DoubleEndedSearcher<'a>
798 fn next_back(&mut self) -> Option<$iterty> {
803 $(#[$common_stability_attribute])*
804 impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P>
805 where P::Searcher: DoubleEndedSearcher<'a>
808 fn next_back(&mut self) -> Option<$iterty> {
814 single ended; with $(#[$common_stability_attribute:meta])*,
815 $forward_iterator:ident,
816 $reverse_iterator:ident, $iterty:ty
820 derive_pattern_clone!{
822 with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
825 struct SplitInternal<'a, P: Pattern<'a>> {
828 matcher: P::Searcher,
829 allow_trailing_empty: bool,
833 impl<'a, P: Pattern<'a>> fmt::Debug for SplitInternal<'a, P> where P::Searcher: fmt::Debug {
834 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
835 f.debug_struct("SplitInternal")
836 .field("start", &self.start)
837 .field("end", &self.end)
838 .field("matcher", &self.matcher)
839 .field("allow_trailing_empty", &self.allow_trailing_empty)
840 .field("finished", &self.finished)
845 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
847 fn get_end(&mut self) -> Option<&'a str> {
848 if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
849 self.finished = true;
851 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
860 fn next(&mut self) -> Option<&'a str> {
861 if self.finished { return None }
863 let haystack = self.matcher.haystack();
864 match self.matcher.next_match() {
865 Some((a, b)) => unsafe {
866 let elt = haystack.slice_unchecked(self.start, a);
870 None => self.get_end(),
875 fn next_back(&mut self) -> Option<&'a str>
876 where P::Searcher: ReverseSearcher<'a>
878 if self.finished { return None }
880 if !self.allow_trailing_empty {
881 self.allow_trailing_empty = true;
882 match self.next_back() {
883 Some(elt) if !elt.is_empty() => return Some(elt),
884 _ => if self.finished { return None }
888 let haystack = self.matcher.haystack();
889 match self.matcher.next_match_back() {
890 Some((a, b)) => unsafe {
891 let elt = haystack.slice_unchecked(b, self.end);
896 self.finished = true;
897 Some(haystack.slice_unchecked(self.start, self.end))
903 generate_pattern_iterators! {
905 /// Created with the method [`split()`].
907 /// [`split()`]: ../../std/primitive.str.html#method.split
910 /// Created with the method [`rsplit()`].
912 /// [`rsplit()`]: ../../std/primitive.str.html#method.rsplit
915 #[stable(feature = "rust1", since = "1.0.0")]
917 SplitInternal yielding (&'a str);
918 delegate double ended;
921 generate_pattern_iterators! {
923 /// Created with the method [`split_terminator()`].
925 /// [`split_terminator()`]: ../../std/primitive.str.html#method.split_terminator
926 struct SplitTerminator;
928 /// Created with the method [`rsplit_terminator()`].
930 /// [`rsplit_terminator()`]: ../../std/primitive.str.html#method.rsplit_terminator
931 struct RSplitTerminator;
933 #[stable(feature = "rust1", since = "1.0.0")]
935 SplitInternal yielding (&'a str);
936 delegate double ended;
939 derive_pattern_clone!{
941 with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
944 struct SplitNInternal<'a, P: Pattern<'a>> {
945 iter: SplitInternal<'a, P>,
946 /// The number of splits remaining
950 impl<'a, P: Pattern<'a>> fmt::Debug for SplitNInternal<'a, P> where P::Searcher: fmt::Debug {
951 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
952 f.debug_struct("SplitNInternal")
953 .field("iter", &self.iter)
954 .field("count", &self.count)
959 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
961 fn next(&mut self) -> Option<&'a str> {
964 1 => { self.count = 0; self.iter.get_end() }
965 _ => { self.count -= 1; self.iter.next() }
970 fn next_back(&mut self) -> Option<&'a str>
971 where P::Searcher: ReverseSearcher<'a>
975 1 => { self.count = 0; self.iter.get_end() }
976 _ => { self.count -= 1; self.iter.next_back() }
981 generate_pattern_iterators! {
983 /// Created with the method [`splitn()`].
985 /// [`splitn()`]: ../../std/primitive.str.html#method.splitn
988 /// Created with the method [`rsplitn()`].
990 /// [`rsplitn()`]: ../../std/primitive.str.html#method.rsplitn
993 #[stable(feature = "rust1", since = "1.0.0")]
995 SplitNInternal yielding (&'a str);
996 delegate single ended;
999 derive_pattern_clone!{
1000 clone MatchIndicesInternal
1001 with |s| MatchIndicesInternal(s.0.clone())
1004 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
1006 impl<'a, P: Pattern<'a>> fmt::Debug for MatchIndicesInternal<'a, P> where P::Searcher: fmt::Debug {
1007 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1008 f.debug_tuple("MatchIndicesInternal")
1014 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1016 fn next(&mut self) -> Option<(usize, &'a str)> {
1017 self.0.next_match().map(|(start, end)| unsafe {
1018 (start, self.0.haystack().slice_unchecked(start, end))
1023 fn next_back(&mut self) -> Option<(usize, &'a str)>
1024 where P::Searcher: ReverseSearcher<'a>
1026 self.0.next_match_back().map(|(start, end)| unsafe {
1027 (start, self.0.haystack().slice_unchecked(start, end))
1032 generate_pattern_iterators! {
1034 /// Created with the method [`match_indices()`].
1036 /// [`match_indices()`]: ../../std/primitive.str.html#method.match_indices
1037 struct MatchIndices;
1039 /// Created with the method [`rmatch_indices()`].
1041 /// [`rmatch_indices()`]: ../../std/primitive.str.html#method.rmatch_indices
1042 struct RMatchIndices;
1044 #[stable(feature = "str_match_indices", since = "1.5.0")]
1046 MatchIndicesInternal yielding ((usize, &'a str));
1047 delegate double ended;
1050 derive_pattern_clone!{
1051 clone MatchesInternal
1052 with |s| MatchesInternal(s.0.clone())
1055 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1057 impl<'a, P: Pattern<'a>> fmt::Debug for MatchesInternal<'a, P> where P::Searcher: fmt::Debug {
1058 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1059 f.debug_tuple("MatchesInternal")
1065 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1067 fn next(&mut self) -> Option<&'a str> {
1068 self.0.next_match().map(|(a, b)| unsafe {
1069 // Indices are known to be on utf8 boundaries
1070 self.0.haystack().slice_unchecked(a, b)
1075 fn next_back(&mut self) -> Option<&'a str>
1076 where P::Searcher: ReverseSearcher<'a>
1078 self.0.next_match_back().map(|(a, b)| unsafe {
1079 // Indices are known to be on utf8 boundaries
1080 self.0.haystack().slice_unchecked(a, b)
1085 generate_pattern_iterators! {
1087 /// Created with the method [`matches()`].
1089 /// [`matches()`]: ../../std/primitive.str.html#method.matches
1092 /// Created with the method [`rmatches()`].
1094 /// [`rmatches()`]: ../../std/primitive.str.html#method.rmatches
1097 #[stable(feature = "str_matches", since = "1.2.0")]
1099 MatchesInternal yielding (&'a str);
1100 delegate double ended;
1103 /// Created with the method [`lines()`].
1105 /// [`lines()`]: ../../std/primitive.str.html#method.lines
1106 #[stable(feature = "rust1", since = "1.0.0")]
1107 #[derive(Clone, Debug)]
1108 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1110 #[stable(feature = "rust1", since = "1.0.0")]
1111 impl<'a> Iterator for Lines<'a> {
1112 type Item = &'a str;
1115 fn next(&mut self) -> Option<&'a str> {
1120 fn size_hint(&self) -> (usize, Option<usize>) {
1125 #[stable(feature = "rust1", since = "1.0.0")]
1126 impl<'a> DoubleEndedIterator for Lines<'a> {
1128 fn next_back(&mut self) -> Option<&'a str> {
1133 #[unstable(feature = "fused", issue = "35602")]
1134 impl<'a> FusedIterator for Lines<'a> {}
1136 /// Created with the method [`lines_any()`].
1138 /// [`lines_any()`]: ../../std/primitive.str.html#method.lines_any
1139 #[stable(feature = "rust1", since = "1.0.0")]
1140 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1141 #[derive(Clone, Debug)]
1142 #[allow(deprecated)]
1143 pub struct LinesAny<'a>(Lines<'a>);
1145 /// A nameable, cloneable fn type
1149 impl<'a> Fn<(&'a str,)> for LinesAnyMap {
1151 extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str {
1153 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1158 impl<'a> FnMut<(&'a str,)> for LinesAnyMap {
1160 extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str {
1161 Fn::call(&*self, (line,))
1165 impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
1166 type Output = &'a str;
1169 extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str {
1170 Fn::call(&self, (line,))
1174 #[stable(feature = "rust1", since = "1.0.0")]
1175 #[allow(deprecated)]
1176 impl<'a> Iterator for LinesAny<'a> {
1177 type Item = &'a str;
1180 fn next(&mut self) -> Option<&'a str> {
1185 fn size_hint(&self) -> (usize, Option<usize>) {
1190 #[stable(feature = "rust1", since = "1.0.0")]
1191 #[allow(deprecated)]
1192 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1194 fn next_back(&mut self) -> Option<&'a str> {
1199 #[unstable(feature = "fused", issue = "35602")]
1200 #[allow(deprecated)]
1201 impl<'a> FusedIterator for LinesAny<'a> {}
1204 Section: Comparing strings
1207 /// Bytewise slice equality
1208 /// NOTE: This function is (ab)used in rustc::middle::trans::_match
1209 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
1212 fn eq_slice(a: &str, b: &str) -> bool {
1213 a.as_bytes() == b.as_bytes()
1217 Section: UTF-8 validation
1220 // use truncation to fit u64 into usize
1221 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1223 /// Return `true` if any byte in the word `x` is nonascii (>= 128).
1225 fn contains_nonascii(x: usize) -> bool {
1226 (x & NONASCII_MASK) != 0
1229 /// Walk through `iter` checking that it's a valid UTF-8 sequence,
1230 /// returning `true` in that case, or, if it is invalid, `false` with
1231 /// `iter` reset such that it is pointing at the first byte in the
1232 /// invalid sequence.
1234 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1238 let usize_bytes = mem::size_of::<usize>();
1239 let ascii_block_size = 2 * usize_bytes;
1240 let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1243 let old_offset = index;
1244 macro_rules! err { () => {{
1245 return Err(Utf8Error {
1246 valid_up_to: old_offset
1250 macro_rules! next { () => {{
1252 // we needed data, but there was none: error!
1259 let first = v[index];
1261 let w = UTF8_CHAR_WIDTH[first as usize];
1262 let second = next!();
1263 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
1264 // first C2 80 last DF BF
1265 // 3-byte encoding is for codepoints \u{0800} to \u{ffff}
1266 // first E0 A0 80 last EF BF BF
1267 // excluding surrogates codepoints \u{d800} to \u{dfff}
1268 // ED A0 80 to ED BF BF
1269 // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1270 // first F0 90 80 80 last F4 8F BF BF
1272 // Use the UTF-8 syntax from the RFC
1274 // https://tools.ietf.org/html/rfc3629
1276 // UTF8-2 = %xC2-DF UTF8-tail
1277 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1278 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1279 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1280 // %xF4 %x80-8F 2( UTF8-tail )
1282 2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()},
1284 match (first, second, next!() & !CONT_MASK) {
1285 (0xE0 , 0xA0 ... 0xBF, TAG_CONT_U8) |
1286 (0xE1 ... 0xEC, 0x80 ... 0xBF, TAG_CONT_U8) |
1287 (0xED , 0x80 ... 0x9F, TAG_CONT_U8) |
1288 (0xEE ... 0xEF, 0x80 ... 0xBF, TAG_CONT_U8) => {}
1293 match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
1294 (0xF0 , 0x90 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1295 (0xF1 ... 0xF3, 0x80 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1296 (0xF4 , 0x80 ... 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
1304 // Ascii case, try to skip forward quickly.
1305 // When the pointer is aligned, read 2 words of data per iteration
1306 // until we find a word containing a non-ascii byte.
1307 let ptr = v.as_ptr();
1308 let align = (ptr as usize + index) & (usize_bytes - 1);
1310 while index < blocks_end {
1312 let block = ptr.offset(index as isize) as *const usize;
1313 // break if there is a nonascii byte
1314 let zu = contains_nonascii(*block);
1315 let zv = contains_nonascii(*block.offset(1));
1320 index += ascii_block_size;
1322 // step from the point where the wordwise loop stopped
1323 while index < len && v[index] < 128 {
1335 // https://tools.ietf.org/html/rfc3629
1336 static UTF8_CHAR_WIDTH: [u8; 256] = [
1337 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1338 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
1339 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1340 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
1341 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1342 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
1343 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1344 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
1345 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1346 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
1347 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1348 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
1349 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1350 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
1351 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
1352 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
1355 /// Given a first byte, determine how many bytes are in this UTF-8 character
1356 #[unstable(feature = "str_internals", issue = "0")]
1358 pub fn utf8_char_width(b: u8) -> usize {
1359 return UTF8_CHAR_WIDTH[b as usize] as usize;
1362 /// Mask of the value bits of a continuation byte
1363 const CONT_MASK: u8 = 0b0011_1111;
1364 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
1365 const TAG_CONT_U8: u8 = 0b1000_0000;
1368 Section: Trait implementations
1376 /// Implements ordering of strings.
1378 /// Strings are ordered lexicographically by their byte values. This orders Unicode code
1379 /// points based on their positions in the code charts. This is not necessarily the same as
1380 /// "alphabetical" order, which varies by language and locale. Sorting strings according to
1381 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1383 #[stable(feature = "rust1", since = "1.0.0")]
1386 fn cmp(&self, other: &str) -> Ordering {
1387 self.as_bytes().cmp(other.as_bytes())
1391 #[stable(feature = "rust1", since = "1.0.0")]
1392 impl PartialEq for str {
1394 fn eq(&self, other: &str) -> bool {
1395 eq_slice(self, other)
1398 fn ne(&self, other: &str) -> bool { !(*self).eq(other) }
1401 #[stable(feature = "rust1", since = "1.0.0")]
1404 /// Implements comparison operations on strings.
1406 /// Strings are compared lexicographically by their byte values. This compares Unicode code
1407 /// points based on their positions in the code charts. This is not necessarily the same as
1408 /// "alphabetical" order, which varies by language and locale. Comparing strings according to
1409 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1411 #[stable(feature = "rust1", since = "1.0.0")]
1412 impl PartialOrd for str {
1414 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1415 Some(self.cmp(other))
1419 /// Implements substring slicing with syntax `&self[begin .. end]`.
1421 /// Returns a slice of the given string from the byte range
1422 /// [`begin`..`end`).
1424 /// This operation is `O(1)`.
1428 /// Panics if `begin` or `end` does not point to the starting
1429 /// byte offset of a character (as defined by `is_char_boundary`).
1430 /// Requires that `begin <= end` and `end <= len` where `len` is the
1431 /// length of the string.
1436 /// let s = "Löwe 老虎 Léopard";
1437 /// assert_eq!(&s[0 .. 1], "L");
1439 /// assert_eq!(&s[1 .. 9], "öwe 老");
1441 /// // these will panic:
1442 /// // byte 2 lies within `ö`:
1445 /// // byte 8 lies within `老`
1448 /// // byte 100 is outside the string
1449 /// // &s[3 .. 100];
1451 #[stable(feature = "rust1", since = "1.0.0")]
1452 impl ops::Index<ops::Range<usize>> for str {
1455 fn index(&self, index: ops::Range<usize>) -> &str {
1456 // is_char_boundary checks that the index is in [0, .len()]
1457 if index.start <= index.end &&
1458 self.is_char_boundary(index.start) &&
1459 self.is_char_boundary(index.end) {
1460 unsafe { self.slice_unchecked(index.start, index.end) }
1462 super::slice_error_fail(self, index.start, index.end)
1467 /// Implements mutable substring slicing with syntax
1468 /// `&mut self[begin .. end]`.
1470 /// Returns a mutable slice of the given string from the byte range
1471 /// [`begin`..`end`).
1473 /// This operation is `O(1)`.
1477 /// Panics if `begin` or `end` does not point to the starting
1478 /// byte offset of a character (as defined by `is_char_boundary`).
1479 /// Requires that `begin <= end` and `end <= len` where `len` is the
1480 /// length of the string.
1481 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1482 impl ops::IndexMut<ops::Range<usize>> for str {
1484 fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
1485 // is_char_boundary checks that the index is in [0, .len()]
1486 if index.start <= index.end &&
1487 self.is_char_boundary(index.start) &&
1488 self.is_char_boundary(index.end) {
1489 unsafe { self.slice_mut_unchecked(index.start, index.end) }
1491 super::slice_error_fail(self, index.start, index.end)
1496 /// Implements substring slicing with syntax `&self[.. end]`.
1498 /// Returns a slice of the string from the beginning to byte offset
1501 /// Equivalent to `&self[0 .. end]`.
1502 #[stable(feature = "rust1", since = "1.0.0")]
1503 impl ops::Index<ops::RangeTo<usize>> for str {
1507 fn index(&self, index: ops::RangeTo<usize>) -> &str {
1508 // is_char_boundary checks that the index is in [0, .len()]
1509 if self.is_char_boundary(index.end) {
1510 unsafe { self.slice_unchecked(0, index.end) }
1512 super::slice_error_fail(self, 0, index.end)
1517 /// Implements mutable substring slicing with syntax `&mut self[.. end]`.
1519 /// Returns a mutable slice of the string from the beginning to byte offset
1522 /// Equivalent to `&mut self[0 .. end]`.
1523 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1524 impl ops::IndexMut<ops::RangeTo<usize>> for str {
1526 fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
1527 // is_char_boundary checks that the index is in [0, .len()]
1528 if self.is_char_boundary(index.end) {
1529 unsafe { self.slice_mut_unchecked(0, index.end) }
1531 super::slice_error_fail(self, 0, index.end)
1536 /// Implements substring slicing with syntax `&self[begin ..]`.
1538 /// Returns a slice of the string from byte offset `begin`
1539 /// to the end of the string.
1541 /// Equivalent to `&self[begin .. len]`.
1542 #[stable(feature = "rust1", since = "1.0.0")]
1543 impl ops::Index<ops::RangeFrom<usize>> for str {
1547 fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1548 // is_char_boundary checks that the index is in [0, .len()]
1549 if self.is_char_boundary(index.start) {
1550 unsafe { self.slice_unchecked(index.start, self.len()) }
1552 super::slice_error_fail(self, index.start, self.len())
1557 /// Implements mutable substring slicing with syntax `&mut self[begin ..]`.
1559 /// Returns a mutable slice of the string from byte offset `begin`
1560 /// to the end of the string.
1562 /// Equivalent to `&mut self[begin .. len]`.
1563 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1564 impl ops::IndexMut<ops::RangeFrom<usize>> for str {
1566 fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
1567 // is_char_boundary checks that the index is in [0, .len()]
1568 if self.is_char_boundary(index.start) {
1569 let len = self.len();
1570 unsafe { self.slice_mut_unchecked(index.start, len) }
1572 super::slice_error_fail(self, index.start, self.len())
1577 /// Implements substring slicing with syntax `&self[..]`.
1579 /// Returns a slice of the whole string. This operation can
1582 /// Equivalent to `&self[0 .. len]`.
1583 #[stable(feature = "rust1", since = "1.0.0")]
1584 impl ops::Index<ops::RangeFull> for str {
1588 fn index(&self, _index: ops::RangeFull) -> &str {
1593 /// Implements mutable substring slicing with syntax `&mut self[..]`.
1595 /// Returns a mutable slice of the whole string. This operation can
1598 /// Equivalent to `&mut self[0 .. len]`.
1599 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1600 impl ops::IndexMut<ops::RangeFull> for str {
1602 fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
1607 #[unstable(feature = "inclusive_range",
1608 reason = "recently added, follows RFC",
1610 impl ops::Index<ops::RangeInclusive<usize>> for str {
1614 fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1616 ops::RangeInclusive::Empty { .. } => "",
1617 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1618 panic!("attempted to index slice up to maximum usize"),
1619 ops::RangeInclusive::NonEmpty { start, end } =>
1620 self.index(start .. end+1)
1624 #[unstable(feature = "inclusive_range",
1625 reason = "recently added, follows RFC",
1627 impl ops::Index<ops::RangeToInclusive<usize>> for str {
1631 fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1632 self.index(0...index.end)
1636 #[unstable(feature = "inclusive_range",
1637 reason = "recently added, follows RFC",
1639 impl ops::IndexMut<ops::RangeInclusive<usize>> for str {
1641 fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
1643 ops::RangeInclusive::Empty { .. } => &mut self[0..0], // `&mut ""` doesn't work
1644 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1645 panic!("attempted to index str up to maximum usize"),
1646 ops::RangeInclusive::NonEmpty { start, end } =>
1647 self.index_mut(start .. end+1)
1651 #[unstable(feature = "inclusive_range",
1652 reason = "recently added, follows RFC",
1654 impl ops::IndexMut<ops::RangeToInclusive<usize>> for str {
1656 fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
1657 self.index_mut(0...index.end)
1662 /// Methods for string slices
1663 #[allow(missing_docs)]
1665 #[unstable(feature = "core_str_ext",
1666 reason = "stable interface provided by `impl str` in later crates",
1669 // NB there are no docs here are they're all located on the StrExt trait in
1670 // libcollections, not here.
1672 #[stable(feature = "core", since = "1.6.0")]
1673 fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1674 #[stable(feature = "core", since = "1.6.0")]
1675 fn chars(&self) -> Chars;
1676 #[stable(feature = "core", since = "1.6.0")]
1677 fn bytes(&self) -> Bytes;
1678 #[stable(feature = "core", since = "1.6.0")]
1679 fn char_indices(&self) -> CharIndices;
1680 #[stable(feature = "core", since = "1.6.0")]
1681 fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
1682 #[stable(feature = "core", since = "1.6.0")]
1683 fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1684 where P::Searcher: ReverseSearcher<'a>;
1685 #[stable(feature = "core", since = "1.6.0")]
1686 fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
1687 #[stable(feature = "core", since = "1.6.0")]
1688 fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1689 where P::Searcher: ReverseSearcher<'a>;
1690 #[stable(feature = "core", since = "1.6.0")]
1691 fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
1692 #[stable(feature = "core", since = "1.6.0")]
1693 fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1694 where P::Searcher: ReverseSearcher<'a>;
1695 #[stable(feature = "core", since = "1.6.0")]
1696 fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>;
1697 #[stable(feature = "core", since = "1.6.0")]
1698 fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1699 where P::Searcher: ReverseSearcher<'a>;
1700 #[stable(feature = "core", since = "1.6.0")]
1701 fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
1702 #[stable(feature = "core", since = "1.6.0")]
1703 fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1704 where P::Searcher: ReverseSearcher<'a>;
1705 #[stable(feature = "core", since = "1.6.0")]
1706 fn lines(&self) -> Lines;
1707 #[stable(feature = "core", since = "1.6.0")]
1708 #[rustc_deprecated(since = "1.6.0", reason = "use lines() instead now")]
1709 #[allow(deprecated)]
1710 fn lines_any(&self) -> LinesAny;
1711 #[stable(feature = "core", since = "1.6.0")]
1712 unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str;
1713 #[stable(feature = "core", since = "1.6.0")]
1714 unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str;
1715 #[stable(feature = "core", since = "1.6.0")]
1716 fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1717 #[stable(feature = "core", since = "1.6.0")]
1718 fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1719 where P::Searcher: ReverseSearcher<'a>;
1720 #[stable(feature = "core", since = "1.6.0")]
1721 fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1722 where P::Searcher: DoubleEndedSearcher<'a>;
1723 #[stable(feature = "core", since = "1.6.0")]
1724 fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
1725 #[stable(feature = "core", since = "1.6.0")]
1726 fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1727 where P::Searcher: ReverseSearcher<'a>;
1728 #[stable(feature = "is_char_boundary", since = "1.9.0")]
1729 fn is_char_boundary(&self, index: usize) -> bool;
1730 #[stable(feature = "core", since = "1.6.0")]
1731 fn as_bytes(&self) -> &[u8];
1732 #[stable(feature = "core", since = "1.6.0")]
1733 fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1734 #[stable(feature = "core", since = "1.6.0")]
1735 fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1736 where P::Searcher: ReverseSearcher<'a>;
1737 fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1738 #[stable(feature = "core", since = "1.6.0")]
1739 fn split_at(&self, mid: usize) -> (&str, &str);
1740 #[stable(feature = "core", since = "1.6.0")]
1741 fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str);
1742 #[stable(feature = "core", since = "1.6.0")]
1743 fn as_ptr(&self) -> *const u8;
1744 #[stable(feature = "core", since = "1.6.0")]
1745 fn len(&self) -> usize;
1746 #[stable(feature = "core", since = "1.6.0")]
1747 fn is_empty(&self) -> bool;
1748 #[stable(feature = "core", since = "1.6.0")]
1749 fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
1752 // truncate `&str` to length at most equal to `max`
1753 // return `true` if it were truncated, and the new str.
1754 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
1758 while !s.is_char_boundary(max) {
1767 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
1768 const MAX_DISPLAY_LENGTH: usize = 256;
1769 let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
1770 let ellipsis = if truncated { "[...]" } else { "" };
1773 if begin > s.len() || end > s.len() {
1774 let oob_index = if begin > s.len() { begin } else { end };
1775 panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
1779 assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}",
1780 begin, end, s_trunc, ellipsis);
1782 // 3. character boundary
1783 let index = if !s.is_char_boundary(begin) { begin } else { end };
1784 // find the character
1785 let mut char_start = index;
1786 while !s.is_char_boundary(char_start) {
1789 // `char_start` must be less than len and a char boundary
1790 let ch = s[char_start..].chars().next().unwrap();
1791 let char_range = char_start .. char_start + ch.len_utf8();
1792 panic!("byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
1793 index, ch, char_range, s_trunc, ellipsis);
1796 #[stable(feature = "core", since = "1.6.0")]
1797 impl StrExt for str {
1799 fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1800 pat.is_contained_in(self)
1804 fn chars(&self) -> Chars {
1805 Chars{iter: self.as_bytes().iter()}
1809 fn bytes(&self) -> Bytes {
1810 Bytes(self.as_bytes().iter().cloned())
1814 fn char_indices(&self) -> CharIndices {
1815 CharIndices { front_offset: 0, iter: self.chars() }
1819 fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
1820 Split(SplitInternal {
1823 matcher: pat.into_searcher(self),
1824 allow_trailing_empty: true,
1830 fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1831 where P::Searcher: ReverseSearcher<'a>
1833 RSplit(self.split(pat).0)
1837 fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
1838 SplitN(SplitNInternal {
1839 iter: self.split(pat).0,
1845 fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1846 where P::Searcher: ReverseSearcher<'a>
1848 RSplitN(self.splitn(count, pat).0)
1852 fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1853 SplitTerminator(SplitInternal {
1854 allow_trailing_empty: false,
1860 fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1861 where P::Searcher: ReverseSearcher<'a>
1863 RSplitTerminator(self.split_terminator(pat).0)
1867 fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1868 Matches(MatchesInternal(pat.into_searcher(self)))
1872 fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1873 where P::Searcher: ReverseSearcher<'a>
1875 RMatches(self.matches(pat).0)
1879 fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
1880 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
1884 fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1885 where P::Searcher: ReverseSearcher<'a>
1887 RMatchIndices(self.match_indices(pat).0)
1890 fn lines(&self) -> Lines {
1891 Lines(self.split_terminator('\n').map(LinesAnyMap))
1895 #[allow(deprecated)]
1896 fn lines_any(&self) -> LinesAny {
1897 LinesAny(self.lines())
1901 unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
1902 let ptr = self.as_ptr().offset(begin as isize);
1903 let len = end - begin;
1904 from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1908 unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
1909 let ptr = self.as_ptr().offset(begin as isize);
1910 let len = end - begin;
1911 mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len))
1915 fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1916 pat.is_prefix_of(self)
1920 fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1921 where P::Searcher: ReverseSearcher<'a>
1923 pat.is_suffix_of(self)
1927 fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1928 where P::Searcher: DoubleEndedSearcher<'a>
1932 let mut matcher = pat.into_searcher(self);
1933 if let Some((a, b)) = matcher.next_reject() {
1935 j = b; // Remember earliest known match, correct it below if
1936 // last match is different
1938 if let Some((_, b)) = matcher.next_reject_back() {
1942 // Searcher is known to return valid indices
1943 self.slice_unchecked(i, j)
1948 fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1949 let mut i = self.len();
1950 let mut matcher = pat.into_searcher(self);
1951 if let Some((a, _)) = matcher.next_reject() {
1955 // Searcher is known to return valid indices
1956 self.slice_unchecked(i, self.len())
1961 fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1962 where P::Searcher: ReverseSearcher<'a>
1965 let mut matcher = pat.into_searcher(self);
1966 if let Some((_, b)) = matcher.next_reject_back() {
1970 // Searcher is known to return valid indices
1971 self.slice_unchecked(0, j)
1976 fn is_char_boundary(&self, index: usize) -> bool {
1977 // 0 and len are always ok.
1978 // Test for 0 explicitly so that it can optimize out the check
1979 // easily and skip reading string data for that case.
1980 if index == 0 || index == self.len() { return true; }
1981 match self.as_bytes().get(index) {
1983 // This is bit magic equivalent to: b < 128 || b >= 192
1984 Some(&b) => (b as i8) >= -0x40,
1989 fn as_bytes(&self) -> &[u8] {
1990 unsafe { mem::transmute(self) }
1993 fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1994 pat.into_searcher(self).next_match().map(|(i, _)| i)
1997 fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1998 where P::Searcher: ReverseSearcher<'a>
2000 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
2003 fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2008 fn split_at(&self, mid: usize) -> (&str, &str) {
2009 // is_char_boundary checks that the index is in [0, .len()]
2010 if self.is_char_boundary(mid) {
2012 (self.slice_unchecked(0, mid),
2013 self.slice_unchecked(mid, self.len()))
2016 slice_error_fail(self, 0, mid)
2020 fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2021 // is_char_boundary checks that the index is in [0, .len()]
2022 if self.is_char_boundary(mid) {
2023 let len = self.len();
2024 let ptr = self.as_ptr() as *mut u8;
2026 (from_raw_parts_mut(ptr, mid),
2027 from_raw_parts_mut(ptr.offset(mid as isize), len - mid))
2030 slice_error_fail(self, 0, mid)
2035 fn as_ptr(&self) -> *const u8 {
2036 self as *const str as *const u8
2040 fn len(&self) -> usize {
2041 self.as_bytes().len()
2045 fn is_empty(&self) -> bool { self.len() == 0 }
2048 fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) }
2051 #[stable(feature = "rust1", since = "1.0.0")]
2052 impl AsRef<[u8]> for str {
2054 fn as_ref(&self) -> &[u8] {
2059 #[stable(feature = "rust1", since = "1.0.0")]
2060 impl<'a> Default for &'a str {
2061 /// Creates an empty str
2062 fn default() -> &'a str { "" }