1 // ignore-tidy-filelength
3 //! String manipulation.
5 //! For more details, see the [`std::str`] module.
7 //! [`std::str`]: ../../std/str/index.html
9 #![stable(feature = "rust1", since = "1.0.0")]
11 use self::pattern::Pattern;
12 use self::pattern::{DoubleEndedSearcher, ReverseSearcher, SearchStep, Searcher};
15 use crate::fmt::{self, Write};
16 use crate::iter::{Chain, FlatMap, Flatten};
17 use crate::iter::{Copied, Filter, FusedIterator, Map, TrustedLen, TrustedRandomAccess};
21 use crate::slice::{self, SliceIndex, Split as SliceSplit};
25 #[unstable(feature = "str_internals", issue = "none")]
26 #[allow(missing_docs)]
29 /// Parse a value from a string
31 /// `FromStr`'s [`from_str`] method is often used implicitly, through
32 /// [`str`]'s [`parse`] method. See [`parse`]'s documentation for examples.
34 /// [`from_str`]: #tymethod.from_str
35 /// [`str`]: ../../std/primitive.str.html
36 /// [`parse`]: ../../std/primitive.str.html#method.parse
38 /// `FromStr` does not have a lifetime parameter, and so you can only parse types
39 /// that do not contain a lifetime parameter themselves. In other words, you can
40 /// parse an `i32` with `FromStr`, but not a `&i32`. You can parse a struct that
41 /// contains an `i32`, but not one that contains an `&i32`.
45 /// Basic implementation of `FromStr` on an example `Point` type:
48 /// use std::str::FromStr;
49 /// use std::num::ParseIntError;
51 /// #[derive(Debug, PartialEq)]
57 /// impl FromStr for Point {
58 /// type Err = ParseIntError;
60 /// fn from_str(s: &str) -> Result<Self, Self::Err> {
61 /// let coords: Vec<&str> = s.trim_matches(|p| p == '(' || p == ')' )
65 /// let x_fromstr = coords[0].parse::<i32>()?;
66 /// let y_fromstr = coords[1].parse::<i32>()?;
68 /// Ok(Point { x: x_fromstr, y: y_fromstr })
72 /// let p = Point::from_str("(1,2)");
73 /// assert_eq!(p.unwrap(), Point{ x: 1, y: 2} )
75 #[stable(feature = "rust1", since = "1.0.0")]
76 pub trait FromStr: Sized {
77 /// The associated error which can be returned from parsing.
78 #[stable(feature = "rust1", since = "1.0.0")]
81 /// Parses a string `s` to return a value of this type.
83 /// If parsing succeeds, return the value inside [`Ok`], otherwise
84 /// when the string is ill-formatted return an error specific to the
85 /// inside [`Err`]. The error type is specific to implementation of the trait.
87 /// [`Ok`]: ../../std/result/enum.Result.html#variant.Ok
88 /// [`Err`]: ../../std/result/enum.Result.html#variant.Err
92 /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
94 /// [ithirtytwo]: ../../std/primitive.i32.html
97 /// use std::str::FromStr;
100 /// let x = i32::from_str(s).unwrap();
102 /// assert_eq!(5, x);
104 #[stable(feature = "rust1", since = "1.0.0")]
105 fn from_str(s: &str) -> Result<Self, Self::Err>;
108 #[stable(feature = "rust1", since = "1.0.0")]
109 impl FromStr for bool {
110 type Err = ParseBoolError;
112 /// Parse a `bool` from a string.
114 /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
115 /// actually be parseable.
120 /// use std::str::FromStr;
122 /// assert_eq!(FromStr::from_str("true"), Ok(true));
123 /// assert_eq!(FromStr::from_str("false"), Ok(false));
124 /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
127 /// Note, in many cases, the `.parse()` method on `str` is more proper.
130 /// assert_eq!("true".parse(), Ok(true));
131 /// assert_eq!("false".parse(), Ok(false));
132 /// assert!("not even a boolean".parse::<bool>().is_err());
135 fn from_str(s: &str) -> Result<bool, ParseBoolError> {
138 "false" => Ok(false),
139 _ => Err(ParseBoolError { _priv: () }),
144 /// An error returned when parsing a `bool` using [`from_str`] fails
146 /// [`from_str`]: ../../std/primitive.bool.html#method.from_str
147 #[derive(Debug, Clone, PartialEq, Eq)]
148 #[stable(feature = "rust1", since = "1.0.0")]
149 pub struct ParseBoolError {
153 #[stable(feature = "rust1", since = "1.0.0")]
154 impl fmt::Display for ParseBoolError {
155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 "provided string was not `true` or `false`".fmt(f)
161 Section: Creating a string
164 /// Errors which can occur when attempting to interpret a sequence of [`u8`]
167 /// [`u8`]: ../../std/primitive.u8.html
169 /// As such, the `from_utf8` family of functions and methods for both [`String`]s
170 /// and [`&str`]s make use of this error, for example.
172 /// [`String`]: ../../std/string/struct.String.html#method.from_utf8
173 /// [`&str`]: ../../std/str/fn.from_utf8.html
177 /// This error type’s methods can be used to create functionality
178 /// similar to `String::from_utf8_lossy` without allocating heap memory:
181 /// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) {
183 /// match std::str::from_utf8(input) {
189 /// let (valid, after_valid) = input.split_at(error.valid_up_to());
191 /// push(std::str::from_utf8_unchecked(valid))
193 /// push("\u{FFFD}");
195 /// if let Some(invalid_sequence_length) = error.error_len() {
196 /// input = &after_valid[invalid_sequence_length..]
205 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
206 #[stable(feature = "rust1", since = "1.0.0")]
207 pub struct Utf8Error {
209 error_len: Option<u8>,
213 /// Returns the index in the given string up to which valid UTF-8 was
216 /// It is the maximum index such that `from_utf8(&input[..index])`
217 /// would return `Ok(_)`.
226 /// // some invalid bytes, in a vector
227 /// let sparkle_heart = vec![0, 159, 146, 150];
229 /// // std::str::from_utf8 returns a Utf8Error
230 /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
232 /// // the second byte is invalid here
233 /// assert_eq!(1, error.valid_up_to());
235 #[stable(feature = "utf8_error", since = "1.5.0")]
236 pub fn valid_up_to(&self) -> usize {
240 /// Provides more information about the failure:
242 /// * `None`: the end of the input was reached unexpectedly.
243 /// `self.valid_up_to()` is 1 to 3 bytes from the end of the input.
244 /// If a byte stream (such as a file or a network socket) is being decoded incrementally,
245 /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
247 /// * `Some(len)`: an unexpected byte was encountered.
248 /// The length provided is that of the invalid byte sequence
249 /// that starts at the index given by `valid_up_to()`.
250 /// Decoding should resume after that sequence
251 /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of
254 /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
255 #[stable(feature = "utf8_error_error_len", since = "1.20.0")]
256 pub fn error_len(&self) -> Option<usize> {
257 self.error_len.map(|len| len as usize)
261 /// Converts a slice of bytes to a string slice.
263 /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice
264 /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between
265 /// the two. Not all byte slices are valid string slices, however: [`&str`] requires
266 /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid
267 /// UTF-8, and then does the conversion.
269 /// [`&str`]: ../../std/primitive.str.html
270 /// [`u8`]: ../../std/primitive.u8.html
271 /// [byteslice]: ../../std/primitive.slice.html
273 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
274 /// incur the overhead of the validity check, there is an unsafe version of
275 /// this function, [`from_utf8_unchecked`][fromutf8u], which has the same
276 /// behavior but skips the check.
278 /// [fromutf8u]: fn.from_utf8_unchecked.html
280 /// If you need a `String` instead of a `&str`, consider
281 /// [`String::from_utf8`][string].
283 /// [string]: ../../std/string/struct.String.html#method.from_utf8
285 /// Because you can stack-allocate a `[u8; N]`, and you can take a
286 /// [`&[u8]`][byteslice] of it, this function is one way to have a
287 /// stack-allocated string. There is an example of this in the
288 /// examples section below.
290 /// [byteslice]: ../../std/primitive.slice.html
294 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
295 /// provided slice is not UTF-8.
304 /// // some bytes, in a vector
305 /// let sparkle_heart = vec![240, 159, 146, 150];
307 /// // We know these bytes are valid, so just use `unwrap()`.
308 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
310 /// assert_eq!("💖", sparkle_heart);
318 /// // some invalid bytes, in a vector
319 /// let sparkle_heart = vec![0, 159, 146, 150];
321 /// assert!(str::from_utf8(&sparkle_heart).is_err());
324 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
325 /// errors that can be returned.
327 /// [error]: struct.Utf8Error.html
329 /// A "stack allocated string":
334 /// // some bytes, in a stack-allocated array
335 /// let sparkle_heart = [240, 159, 146, 150];
337 /// // We know these bytes are valid, so just use `unwrap()`.
338 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
340 /// assert_eq!("💖", sparkle_heart);
342 #[stable(feature = "rust1", since = "1.0.0")]
343 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
344 run_utf8_validation(v)?;
345 // SAFETY: Just ran validation.
346 Ok(unsafe { from_utf8_unchecked(v) })
349 /// Converts a mutable slice of bytes to a mutable string slice.
358 /// // "Hello, Rust!" as a mutable vector
359 /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33];
361 /// // As we know these bytes are valid, we can use `unwrap()`
362 /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap();
364 /// assert_eq!("Hello, Rust!", outstr);
372 /// // Some invalid bytes in a mutable vector
373 /// let mut invalid = vec![128, 223];
375 /// assert!(str::from_utf8_mut(&mut invalid).is_err());
377 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
378 /// errors that can be returned.
380 /// [error]: struct.Utf8Error.html
381 #[stable(feature = "str_mut_extras", since = "1.20.0")]
382 pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
383 run_utf8_validation(v)?;
384 // SAFETY: Just ran validation.
385 Ok(unsafe { from_utf8_unchecked_mut(v) })
388 /// Converts a slice of bytes to a string slice without checking
389 /// that the string contains valid UTF-8.
391 /// See the safe version, [`from_utf8`][fromutf8], for more information.
393 /// [fromutf8]: fn.from_utf8.html
397 /// This function is unsafe because it does not check that the bytes passed to
398 /// it are valid UTF-8. If this constraint is violated, undefined behavior
399 /// results, as the rest of Rust assumes that [`&str`]s are valid UTF-8.
401 /// [`&str`]: ../../std/primitive.str.html
410 /// // some bytes, in a vector
411 /// let sparkle_heart = vec![240, 159, 146, 150];
413 /// let sparkle_heart = unsafe {
414 /// str::from_utf8_unchecked(&sparkle_heart)
417 /// assert_eq!("💖", sparkle_heart);
420 #[stable(feature = "rust1", since = "1.0.0")]
421 pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
422 &*(v as *const [u8] as *const str)
425 /// Converts a slice of bytes to a string slice without checking
426 /// that the string contains valid UTF-8; mutable version.
428 /// See the immutable version, [`from_utf8_unchecked()`][fromutf8], for more information.
430 /// [fromutf8]: fn.from_utf8_unchecked.html
439 /// let mut heart = vec![240, 159, 146, 150];
440 /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) };
442 /// assert_eq!("💖", heart);
445 #[stable(feature = "str_mut_extras", since = "1.20.0")]
446 pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
447 &mut *(v as *mut [u8] as *mut str)
450 #[stable(feature = "rust1", since = "1.0.0")]
451 impl fmt::Display for Utf8Error {
452 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
453 if let Some(error_len) = self.error_len {
456 "invalid utf-8 sequence of {} bytes from index {}",
457 error_len, self.valid_up_to
460 write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
469 /// An iterator over the [`char`]s of a string slice.
471 /// [`char`]: ../../std/primitive.char.html
473 /// This struct is created by the [`chars`] method on [`str`].
474 /// See its documentation for more.
476 /// [`chars`]: ../../std/primitive.str.html#method.chars
477 /// [`str`]: ../../std/primitive.str.html
479 #[stable(feature = "rust1", since = "1.0.0")]
480 pub struct Chars<'a> {
481 iter: slice::Iter<'a, u8>,
484 /// Returns the initial codepoint accumulator for the first byte.
485 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
486 /// for width 3, and 3 bits for width 4.
488 fn utf8_first_byte(byte: u8, width: u32) -> u32 {
489 (byte & (0x7F >> width)) as u32
492 /// Returns the value of `ch` updated with continuation byte `byte`.
494 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
495 (ch << 6) | (byte & CONT_MASK) as u32
498 /// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
501 fn utf8_is_cont_byte(byte: u8) -> bool {
502 (byte & !CONT_MASK) == TAG_CONT_U8
506 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
513 /// Reads the next code point out of a byte iterator (assuming a
514 /// UTF-8-like encoding).
515 #[unstable(feature = "str_internals", issue = "none")]
517 pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
519 let x = *bytes.next()?;
521 return Some(x as u32);
524 // Multibyte case follows
525 // Decode from a byte combination out of: [[[x y] z] w]
526 // NOTE: Performance is sensitive to the exact formulation here
527 let init = utf8_first_byte(x, 2);
528 let y = unwrap_or_0(bytes.next());
529 let mut ch = utf8_acc_cont_byte(init, y);
532 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
533 let z = unwrap_or_0(bytes.next());
534 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
535 ch = init << 12 | y_z;
538 // use only the lower 3 bits of `init`
539 let w = unwrap_or_0(bytes.next());
540 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
547 /// Reads the last code point out of a byte iterator (assuming a
548 /// UTF-8-like encoding).
550 fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
552 I: DoubleEndedIterator<Item = &'a u8>,
555 let w = match *bytes.next_back()? {
556 next_byte if next_byte < 128 => return Some(next_byte as u32),
557 back_byte => back_byte,
560 // Multibyte case follows
561 // Decode from a byte combination out of: [x [y [z w]]]
563 let z = unwrap_or_0(bytes.next_back());
564 ch = utf8_first_byte(z, 2);
565 if utf8_is_cont_byte(z) {
566 let y = unwrap_or_0(bytes.next_back());
567 ch = utf8_first_byte(y, 3);
568 if utf8_is_cont_byte(y) {
569 let x = unwrap_or_0(bytes.next_back());
570 ch = utf8_first_byte(x, 4);
571 ch = utf8_acc_cont_byte(ch, y);
573 ch = utf8_acc_cont_byte(ch, z);
575 ch = utf8_acc_cont_byte(ch, w);
580 #[stable(feature = "rust1", since = "1.0.0")]
581 impl<'a> Iterator for Chars<'a> {
585 fn next(&mut self) -> Option<char> {
586 next_code_point(&mut self.iter).map(|ch| {
587 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
588 unsafe { char::from_u32_unchecked(ch) }
593 fn count(self) -> usize {
594 // length in `char` is equal to the number of non-continuation bytes
595 let bytes_len = self.iter.len();
596 let mut cont_bytes = 0;
597 for &byte in self.iter {
598 cont_bytes += utf8_is_cont_byte(byte) as usize;
600 bytes_len - cont_bytes
604 fn size_hint(&self) -> (usize, Option<usize>) {
605 let len = self.iter.len();
606 // `(len + 3)` can't overflow, because we know that the `slice::Iter`
607 // belongs to a slice in memory which has a maximum length of
608 // `isize::MAX` (that's well below `usize::MAX`).
609 ((len + 3) / 4, Some(len))
613 fn last(mut self) -> Option<char> {
614 // No need to go through the entire string.
619 #[stable(feature = "chars_debug_impl", since = "1.38.0")]
620 impl fmt::Debug for Chars<'_> {
621 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
622 write!(f, "Chars(")?;
623 f.debug_list().entries(self.clone()).finish()?;
629 #[stable(feature = "rust1", since = "1.0.0")]
630 impl<'a> DoubleEndedIterator for Chars<'a> {
632 fn next_back(&mut self) -> Option<char> {
633 next_code_point_reverse(&mut self.iter).map(|ch| {
634 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
635 unsafe { char::from_u32_unchecked(ch) }
640 #[stable(feature = "fused", since = "1.26.0")]
641 impl FusedIterator for Chars<'_> {}
644 /// Views the underlying data as a subslice of the original data.
646 /// This has the same lifetime as the original slice, and so the
647 /// iterator can continue to be used while this exists.
652 /// let mut chars = "abc".chars();
654 /// assert_eq!(chars.as_str(), "abc");
656 /// assert_eq!(chars.as_str(), "bc");
659 /// assert_eq!(chars.as_str(), "");
661 #[stable(feature = "iter_to_slice", since = "1.4.0")]
663 pub fn as_str(&self) -> &'a str {
664 // SAFETY: `Chars` is only made from a str, which guarantees the iter is valid UTF-8.
665 unsafe { from_utf8_unchecked(self.iter.as_slice()) }
669 /// An iterator over the [`char`]s of a string slice, and their positions.
671 /// [`char`]: ../../std/primitive.char.html
673 /// This struct is created by the [`char_indices`] method on [`str`].
674 /// See its documentation for more.
676 /// [`char_indices`]: ../../std/primitive.str.html#method.char_indices
677 /// [`str`]: ../../std/primitive.str.html
678 #[derive(Clone, Debug)]
679 #[stable(feature = "rust1", since = "1.0.0")]
680 pub struct CharIndices<'a> {
685 #[stable(feature = "rust1", since = "1.0.0")]
686 impl<'a> Iterator for CharIndices<'a> {
687 type Item = (usize, char);
690 fn next(&mut self) -> Option<(usize, char)> {
691 let pre_len = self.iter.iter.len();
692 match self.iter.next() {
695 let index = self.front_offset;
696 let len = self.iter.iter.len();
697 self.front_offset += pre_len - len;
704 fn count(self) -> usize {
709 fn size_hint(&self) -> (usize, Option<usize>) {
710 self.iter.size_hint()
714 fn last(mut self) -> Option<(usize, char)> {
715 // No need to go through the entire string.
720 #[stable(feature = "rust1", since = "1.0.0")]
721 impl<'a> DoubleEndedIterator for CharIndices<'a> {
723 fn next_back(&mut self) -> Option<(usize, char)> {
724 self.iter.next_back().map(|ch| {
725 let index = self.front_offset + self.iter.iter.len();
731 #[stable(feature = "fused", since = "1.26.0")]
732 impl FusedIterator for CharIndices<'_> {}
734 impl<'a> CharIndices<'a> {
735 /// Views the underlying data as a subslice of the original data.
737 /// This has the same lifetime as the original slice, and so the
738 /// iterator can continue to be used while this exists.
739 #[stable(feature = "iter_to_slice", since = "1.4.0")]
741 pub fn as_str(&self) -> &'a str {
746 /// An iterator over the bytes of a string slice.
748 /// This struct is created by the [`bytes`] method on [`str`].
749 /// See its documentation for more.
751 /// [`bytes`]: ../../std/primitive.str.html#method.bytes
752 /// [`str`]: ../../std/primitive.str.html
753 #[stable(feature = "rust1", since = "1.0.0")]
754 #[derive(Clone, Debug)]
755 pub struct Bytes<'a>(Copied<slice::Iter<'a, u8>>);
757 #[stable(feature = "rust1", since = "1.0.0")]
758 impl Iterator for Bytes<'_> {
762 fn next(&mut self) -> Option<u8> {
767 fn size_hint(&self) -> (usize, Option<usize>) {
772 fn count(self) -> usize {
777 fn last(self) -> Option<Self::Item> {
782 fn nth(&mut self, n: usize) -> Option<Self::Item> {
787 fn all<F>(&mut self, f: F) -> bool
789 F: FnMut(Self::Item) -> bool,
795 fn any<F>(&mut self, f: F) -> bool
797 F: FnMut(Self::Item) -> bool,
803 fn find<P>(&mut self, predicate: P) -> Option<Self::Item>
805 P: FnMut(&Self::Item) -> bool,
807 self.0.find(predicate)
811 fn position<P>(&mut self, predicate: P) -> Option<usize>
813 P: FnMut(Self::Item) -> bool,
815 self.0.position(predicate)
819 fn rposition<P>(&mut self, predicate: P) -> Option<usize>
821 P: FnMut(Self::Item) -> bool,
823 self.0.rposition(predicate)
827 #[stable(feature = "rust1", since = "1.0.0")]
828 impl DoubleEndedIterator for Bytes<'_> {
830 fn next_back(&mut self) -> Option<u8> {
835 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
840 fn rfind<P>(&mut self, predicate: P) -> Option<Self::Item>
842 P: FnMut(&Self::Item) -> bool,
844 self.0.rfind(predicate)
848 #[stable(feature = "rust1", since = "1.0.0")]
849 impl ExactSizeIterator for Bytes<'_> {
851 fn len(&self) -> usize {
856 fn is_empty(&self) -> bool {
861 #[stable(feature = "fused", since = "1.26.0")]
862 impl FusedIterator for Bytes<'_> {}
864 #[unstable(feature = "trusted_len", issue = "37572")]
865 unsafe impl TrustedLen for Bytes<'_> {}
868 unsafe impl TrustedRandomAccess for Bytes<'_> {
869 unsafe fn get_unchecked(&mut self, i: usize) -> u8 {
870 self.0.get_unchecked(i)
872 fn may_have_side_effect() -> bool {
877 /// This macro generates a Clone impl for string pattern API
878 /// wrapper types of the form X<'a, P>
879 macro_rules! derive_pattern_clone {
880 (clone $t:ident with |$s:ident| $e:expr) => {
881 impl<'a, P> Clone for $t<'a, P>
883 P: Pattern<'a, Searcher: Clone>,
885 fn clone(&self) -> Self {
893 /// This macro generates two public iterator structs
894 /// wrapping a private internal one that makes use of the `Pattern` API.
896 /// For all patterns `P: Pattern<'a>` the following items will be
897 /// generated (generics omitted):
899 /// struct $forward_iterator($internal_iterator);
900 /// struct $reverse_iterator($internal_iterator);
902 /// impl Iterator for $forward_iterator
903 /// { /* internal ends up calling Searcher::next_match() */ }
905 /// impl DoubleEndedIterator for $forward_iterator
906 /// where P::Searcher: DoubleEndedSearcher
907 /// { /* internal ends up calling Searcher::next_match_back() */ }
909 /// impl Iterator for $reverse_iterator
910 /// where P::Searcher: ReverseSearcher
911 /// { /* internal ends up calling Searcher::next_match_back() */ }
913 /// impl DoubleEndedIterator for $reverse_iterator
914 /// where P::Searcher: DoubleEndedSearcher
915 /// { /* internal ends up calling Searcher::next_match() */ }
917 /// The internal one is defined outside the macro, and has almost the same
918 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
919 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
921 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
922 /// `Pattern` might not return the same elements, so actually implementing
923 /// `DoubleEndedIterator` for it would be incorrect.
924 /// (See the docs in `str::pattern` for more details)
926 /// However, the internal struct still represents a single ended iterator from
927 /// either end, and depending on pattern is also a valid double ended iterator,
928 /// so the two wrapper structs implement `Iterator`
929 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
930 /// to the complex impls seen above.
931 macro_rules! generate_pattern_iterators {
935 $(#[$forward_iterator_attribute:meta])*
936 struct $forward_iterator:ident;
940 $(#[$reverse_iterator_attribute:meta])*
941 struct $reverse_iterator:ident;
943 // Stability of all generated items
945 $(#[$common_stability_attribute:meta])*
947 // Internal almost-iterator that is being delegated to
949 $internal_iterator:ident yielding ($iterty:ty);
951 // Kind of delegation - either single ended or double ended
954 $(#[$forward_iterator_attribute])*
955 $(#[$common_stability_attribute])*
956 pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
958 $(#[$common_stability_attribute])*
959 impl<'a, P> fmt::Debug for $forward_iterator<'a, P>
961 P: Pattern<'a, Searcher: fmt::Debug>,
963 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
964 f.debug_tuple(stringify!($forward_iterator))
970 $(#[$common_stability_attribute])*
971 impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
975 fn next(&mut self) -> Option<$iterty> {
980 $(#[$common_stability_attribute])*
981 impl<'a, P> Clone for $forward_iterator<'a, P>
983 P: Pattern<'a, Searcher: Clone>,
985 fn clone(&self) -> Self {
986 $forward_iterator(self.0.clone())
990 $(#[$reverse_iterator_attribute])*
991 $(#[$common_stability_attribute])*
992 pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
994 $(#[$common_stability_attribute])*
995 impl<'a, P> fmt::Debug for $reverse_iterator<'a, P>
997 P: Pattern<'a, Searcher: fmt::Debug>,
999 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1000 f.debug_tuple(stringify!($reverse_iterator))
1006 $(#[$common_stability_attribute])*
1007 impl<'a, P> Iterator for $reverse_iterator<'a, P>
1009 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1011 type Item = $iterty;
1014 fn next(&mut self) -> Option<$iterty> {
1019 $(#[$common_stability_attribute])*
1020 impl<'a, P> Clone for $reverse_iterator<'a, P>
1022 P: Pattern<'a, Searcher: Clone>,
1024 fn clone(&self) -> Self {
1025 $reverse_iterator(self.0.clone())
1029 #[stable(feature = "fused", since = "1.26.0")]
1030 impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {}
1032 #[stable(feature = "fused", since = "1.26.0")]
1033 impl<'a, P> FusedIterator for $reverse_iterator<'a, P>
1035 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1038 generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
1040 $reverse_iterator, $iterty);
1043 double ended; with $(#[$common_stability_attribute:meta])*,
1044 $forward_iterator:ident,
1045 $reverse_iterator:ident, $iterty:ty
1047 $(#[$common_stability_attribute])*
1048 impl<'a, P> DoubleEndedIterator for $forward_iterator<'a, P>
1050 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1053 fn next_back(&mut self) -> Option<$iterty> {
1058 $(#[$common_stability_attribute])*
1059 impl<'a, P> DoubleEndedIterator for $reverse_iterator<'a, P>
1061 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1064 fn next_back(&mut self) -> Option<$iterty> {
1070 single ended; with $(#[$common_stability_attribute:meta])*,
1071 $forward_iterator:ident,
1072 $reverse_iterator:ident, $iterty:ty
1076 derive_pattern_clone! {
1078 with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
1081 struct SplitInternal<'a, P: Pattern<'a>> {
1084 matcher: P::Searcher,
1085 allow_trailing_empty: bool,
1089 impl<'a, P> fmt::Debug for SplitInternal<'a, P>
1091 P: Pattern<'a, Searcher: fmt::Debug>,
1093 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1094 f.debug_struct("SplitInternal")
1095 .field("start", &self.start)
1096 .field("end", &self.end)
1097 .field("matcher", &self.matcher)
1098 .field("allow_trailing_empty", &self.allow_trailing_empty)
1099 .field("finished", &self.finished)
1104 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1106 fn get_end(&mut self) -> Option<&'a str> {
1107 if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
1108 self.finished = true;
1109 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1111 let string = self.matcher.haystack().get_unchecked(self.start..self.end);
1120 fn next(&mut self) -> Option<&'a str> {
1125 let haystack = self.matcher.haystack();
1126 match self.matcher.next_match() {
1127 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1128 Some((a, b)) => unsafe {
1129 let elt = haystack.get_unchecked(self.start..a);
1133 None => self.get_end(),
1138 fn next_inclusive(&mut self) -> Option<&'a str> {
1143 let haystack = self.matcher.haystack();
1144 match self.matcher.next_match() {
1145 // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
1146 // and self.start is either the start of the original string,
1147 // or `b` was assigned to it, so it also lies on unicode boundary.
1148 Some((_, b)) => unsafe {
1149 let elt = haystack.get_unchecked(self.start..b);
1153 None => self.get_end(),
1158 fn next_back(&mut self) -> Option<&'a str>
1160 P::Searcher: ReverseSearcher<'a>,
1166 if !self.allow_trailing_empty {
1167 self.allow_trailing_empty = true;
1168 match self.next_back() {
1169 Some(elt) if !elt.is_empty() => return Some(elt),
1178 let haystack = self.matcher.haystack();
1179 match self.matcher.next_match_back() {
1180 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1181 Some((a, b)) => unsafe {
1182 let elt = haystack.get_unchecked(b..self.end);
1186 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1188 self.finished = true;
1189 Some(haystack.get_unchecked(self.start..self.end))
1195 fn next_back_inclusive(&mut self) -> Option<&'a str>
1197 P::Searcher: ReverseSearcher<'a>,
1203 if !self.allow_trailing_empty {
1204 self.allow_trailing_empty = true;
1205 match self.next_back_inclusive() {
1206 Some(elt) if !elt.is_empty() => return Some(elt),
1215 let haystack = self.matcher.haystack();
1216 match self.matcher.next_match_back() {
1217 // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
1218 // and self.end is either the end of the original string,
1219 // or `b` was assigned to it, so it also lies on unicode boundary.
1220 Some((_, b)) => unsafe {
1221 let elt = haystack.get_unchecked(b..self.end);
1225 // SAFETY: self.start is either the start of the original string,
1226 // or start of a substring that represents the part of the string that hasn't
1227 // iterated yet. Either way, it is guaranteed to lie on unicode boundary.
1228 // self.end is either the end of the original string,
1229 // or `b` was assigned to it, so it also lies on unicode boundary.
1231 self.finished = true;
1232 Some(haystack.get_unchecked(self.start..self.end))
1238 generate_pattern_iterators! {
1240 /// Created with the method [`split`].
1242 /// [`split`]: ../../std/primitive.str.html#method.split
1245 /// Created with the method [`rsplit`].
1247 /// [`rsplit`]: ../../std/primitive.str.html#method.rsplit
1250 #[stable(feature = "rust1", since = "1.0.0")]
1252 SplitInternal yielding (&'a str);
1253 delegate double ended;
1256 generate_pattern_iterators! {
1258 /// Created with the method [`split_terminator`].
1260 /// [`split_terminator`]: ../../std/primitive.str.html#method.split_terminator
1261 struct SplitTerminator;
1263 /// Created with the method [`rsplit_terminator`].
1265 /// [`rsplit_terminator`]: ../../std/primitive.str.html#method.rsplit_terminator
1266 struct RSplitTerminator;
1268 #[stable(feature = "rust1", since = "1.0.0")]
1270 SplitInternal yielding (&'a str);
1271 delegate double ended;
1274 derive_pattern_clone! {
1275 clone SplitNInternal
1276 with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
1279 struct SplitNInternal<'a, P: Pattern<'a>> {
1280 iter: SplitInternal<'a, P>,
1281 /// The number of splits remaining
1285 impl<'a, P> fmt::Debug for SplitNInternal<'a, P>
1287 P: Pattern<'a, Searcher: fmt::Debug>,
1289 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1290 f.debug_struct("SplitNInternal")
1291 .field("iter", &self.iter)
1292 .field("count", &self.count)
1297 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
1299 fn next(&mut self) -> Option<&'a str> {
1314 fn next_back(&mut self) -> Option<&'a str>
1316 P::Searcher: ReverseSearcher<'a>,
1326 self.iter.next_back()
1332 generate_pattern_iterators! {
1334 /// Created with the method [`splitn`].
1336 /// [`splitn`]: ../../std/primitive.str.html#method.splitn
1339 /// Created with the method [`rsplitn`].
1341 /// [`rsplitn`]: ../../std/primitive.str.html#method.rsplitn
1344 #[stable(feature = "rust1", since = "1.0.0")]
1346 SplitNInternal yielding (&'a str);
1347 delegate single ended;
1350 derive_pattern_clone! {
1351 clone MatchIndicesInternal
1352 with |s| MatchIndicesInternal(s.0.clone())
1355 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
1357 impl<'a, P> fmt::Debug for MatchIndicesInternal<'a, P>
1359 P: Pattern<'a, Searcher: fmt::Debug>,
1361 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1362 f.debug_tuple("MatchIndicesInternal").field(&self.0).finish()
1366 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1368 fn next(&mut self) -> Option<(usize, &'a str)> {
1371 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1372 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1376 fn next_back(&mut self) -> Option<(usize, &'a str)>
1378 P::Searcher: ReverseSearcher<'a>,
1382 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1383 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1387 generate_pattern_iterators! {
1389 /// Created with the method [`match_indices`].
1391 /// [`match_indices`]: ../../std/primitive.str.html#method.match_indices
1392 struct MatchIndices;
1394 /// Created with the method [`rmatch_indices`].
1396 /// [`rmatch_indices`]: ../../std/primitive.str.html#method.rmatch_indices
1397 struct RMatchIndices;
1399 #[stable(feature = "str_match_indices", since = "1.5.0")]
1401 MatchIndicesInternal yielding ((usize, &'a str));
1402 delegate double ended;
1405 derive_pattern_clone! {
1406 clone MatchesInternal
1407 with |s| MatchesInternal(s.0.clone())
1410 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1412 impl<'a, P> fmt::Debug for MatchesInternal<'a, P>
1414 P: Pattern<'a, Searcher: fmt::Debug>,
1416 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1417 f.debug_tuple("MatchesInternal").field(&self.0).finish()
1421 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1423 fn next(&mut self) -> Option<&'a str> {
1424 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1425 self.0.next_match().map(|(a, b)| unsafe {
1426 // Indices are known to be on utf8 boundaries
1427 self.0.haystack().get_unchecked(a..b)
1432 fn next_back(&mut self) -> Option<&'a str>
1434 P::Searcher: ReverseSearcher<'a>,
1436 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1437 self.0.next_match_back().map(|(a, b)| unsafe {
1438 // Indices are known to be on utf8 boundaries
1439 self.0.haystack().get_unchecked(a..b)
1444 generate_pattern_iterators! {
1446 /// Created with the method [`matches`].
1448 /// [`matches`]: ../../std/primitive.str.html#method.matches
1451 /// Created with the method [`rmatches`].
1453 /// [`rmatches`]: ../../std/primitive.str.html#method.rmatches
1456 #[stable(feature = "str_matches", since = "1.2.0")]
1458 MatchesInternal yielding (&'a str);
1459 delegate double ended;
1462 /// An iterator over the lines of a string, as string slices.
1464 /// This struct is created with the [`lines`] method on [`str`].
1465 /// See its documentation for more.
1467 /// [`lines`]: ../../std/primitive.str.html#method.lines
1468 /// [`str`]: ../../std/primitive.str.html
1469 #[stable(feature = "rust1", since = "1.0.0")]
1470 #[derive(Clone, Debug)]
1471 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1473 #[stable(feature = "rust1", since = "1.0.0")]
1474 impl<'a> Iterator for Lines<'a> {
1475 type Item = &'a str;
1478 fn next(&mut self) -> Option<&'a str> {
1483 fn size_hint(&self) -> (usize, Option<usize>) {
1488 fn last(mut self) -> Option<&'a str> {
1493 #[stable(feature = "rust1", since = "1.0.0")]
1494 impl<'a> DoubleEndedIterator for Lines<'a> {
1496 fn next_back(&mut self) -> Option<&'a str> {
1501 #[stable(feature = "fused", since = "1.26.0")]
1502 impl FusedIterator for Lines<'_> {}
1504 /// Created with the method [`lines_any`].
1506 /// [`lines_any`]: ../../std/primitive.str.html#method.lines_any
1507 #[stable(feature = "rust1", since = "1.0.0")]
1508 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1509 #[derive(Clone, Debug)]
1510 #[allow(deprecated)]
1511 pub struct LinesAny<'a>(Lines<'a>);
1514 /// A nameable, cloneable fn type
1516 struct LinesAnyMap impl<'a> Fn = |line: &'a str| -> &'a str {
1518 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1523 #[stable(feature = "rust1", since = "1.0.0")]
1524 #[allow(deprecated)]
1525 impl<'a> Iterator for LinesAny<'a> {
1526 type Item = &'a str;
1529 fn next(&mut self) -> Option<&'a str> {
1534 fn size_hint(&self) -> (usize, Option<usize>) {
1539 #[stable(feature = "rust1", since = "1.0.0")]
1540 #[allow(deprecated)]
1541 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1543 fn next_back(&mut self) -> Option<&'a str> {
1548 #[stable(feature = "fused", since = "1.26.0")]
1549 #[allow(deprecated)]
1550 impl FusedIterator for LinesAny<'_> {}
1553 Section: UTF-8 validation
1556 // use truncation to fit u64 into usize
1557 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1559 /// Returns `true` if any byte in the word `x` is nonascii (>= 128).
1561 fn contains_nonascii(x: usize) -> bool {
1562 (x & NONASCII_MASK) != 0
1565 /// Walks through `v` checking that it's a valid UTF-8 sequence,
1566 /// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
1568 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1572 let usize_bytes = mem::size_of::<usize>();
1573 let ascii_block_size = 2 * usize_bytes;
1574 let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1575 let align = v.as_ptr().align_offset(usize_bytes);
1578 let old_offset = index;
1580 ($error_len: expr) => {
1581 return Err(Utf8Error { valid_up_to: old_offset, error_len: $error_len });
1588 // we needed data, but there was none: error!
1596 let first = v[index];
1598 let w = UTF8_CHAR_WIDTH[first as usize];
1599 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
1600 // first C2 80 last DF BF
1601 // 3-byte encoding is for codepoints \u{0800} to \u{ffff}
1602 // first E0 A0 80 last EF BF BF
1603 // excluding surrogates codepoints \u{d800} to \u{dfff}
1604 // ED A0 80 to ED BF BF
1605 // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1606 // first F0 90 80 80 last F4 8F BF BF
1608 // Use the UTF-8 syntax from the RFC
1610 // https://tools.ietf.org/html/rfc3629
1612 // UTF8-2 = %xC2-DF UTF8-tail
1613 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1614 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1615 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1616 // %xF4 %x80-8F 2( UTF8-tail )
1619 if next!() & !CONT_MASK != TAG_CONT_U8 {
1624 match (first, next!()) {
1626 | (0xE1..=0xEC, 0x80..=0xBF)
1627 | (0xED, 0x80..=0x9F)
1628 | (0xEE..=0xEF, 0x80..=0xBF) => {}
1631 if next!() & !CONT_MASK != TAG_CONT_U8 {
1636 match (first, next!()) {
1637 (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
1640 if next!() & !CONT_MASK != TAG_CONT_U8 {
1643 if next!() & !CONT_MASK != TAG_CONT_U8 {
1651 // Ascii case, try to skip forward quickly.
1652 // When the pointer is aligned, read 2 words of data per iteration
1653 // until we find a word containing a non-ascii byte.
1654 if align != usize::max_value() && align.wrapping_sub(index) % usize_bytes == 0 {
1655 let ptr = v.as_ptr();
1656 while index < blocks_end {
1657 // SAFETY: since `align - index` and `ascii_block_size` are
1658 // multiples of `usize_bytes`, `block = ptr.add(index)` is
1659 // always aligned with a `usize` so it's safe to dereference
1660 // both `block` and `block.offset(1)`.
1662 let block = ptr.add(index) as *const usize;
1663 // break if there is a nonascii byte
1664 let zu = contains_nonascii(*block);
1665 let zv = contains_nonascii(*block.offset(1));
1670 index += ascii_block_size;
1672 // step from the point where the wordwise loop stopped
1673 while index < len && v[index] < 128 {
1685 // https://tools.ietf.org/html/rfc3629
1686 static UTF8_CHAR_WIDTH: [u8; 256] = [
1687 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1689 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1691 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1693 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1697 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1699 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1701 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
1702 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
1705 /// Given a first byte, determines how many bytes are in this UTF-8 character.
1706 #[unstable(feature = "str_internals", issue = "none")]
1708 pub fn utf8_char_width(b: u8) -> usize {
1709 UTF8_CHAR_WIDTH[b as usize] as usize
1712 /// Mask of the value bits of a continuation byte.
1713 const CONT_MASK: u8 = 0b0011_1111;
1714 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
1715 const TAG_CONT_U8: u8 = 0b1000_0000;
1718 Section: Trait implementations
1722 use crate::cmp::Ordering;
1724 use crate::slice::{self, SliceIndex};
1726 /// Implements ordering of strings.
1728 /// Strings are ordered lexicographically by their byte values. This orders Unicode code
1729 /// points based on their positions in the code charts. This is not necessarily the same as
1730 /// "alphabetical" order, which varies by language and locale. Sorting strings according to
1731 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1733 #[stable(feature = "rust1", since = "1.0.0")]
1736 fn cmp(&self, other: &str) -> Ordering {
1737 self.as_bytes().cmp(other.as_bytes())
1741 #[stable(feature = "rust1", since = "1.0.0")]
1742 impl PartialEq for str {
1744 fn eq(&self, other: &str) -> bool {
1745 self.as_bytes() == other.as_bytes()
1748 fn ne(&self, other: &str) -> bool {
1753 #[stable(feature = "rust1", since = "1.0.0")]
1756 /// Implements comparison operations on strings.
1758 /// Strings are compared lexicographically by their byte values. This compares Unicode code
1759 /// points based on their positions in the code charts. This is not necessarily the same as
1760 /// "alphabetical" order, which varies by language and locale. Comparing strings according to
1761 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1763 #[stable(feature = "rust1", since = "1.0.0")]
1764 impl PartialOrd for str {
1766 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1767 Some(self.cmp(other))
1771 #[stable(feature = "rust1", since = "1.0.0")]
1772 impl<I> ops::Index<I> for str
1776 type Output = I::Output;
1779 fn index(&self, index: I) -> &I::Output {
1784 #[stable(feature = "rust1", since = "1.0.0")]
1785 impl<I> ops::IndexMut<I> for str
1790 fn index_mut(&mut self, index: I) -> &mut I::Output {
1791 index.index_mut(self)
1797 fn str_index_overflow_fail() -> ! {
1798 panic!("attempted to index str up to maximum usize");
1801 /// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`.
1803 /// Returns a slice of the whole string, i.e., returns `&self` or `&mut
1804 /// self`. Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`. Unlike
1805 /// other indexing operations, this can never panic.
1807 /// This operation is `O(1)`.
1809 /// Prior to 1.20.0, these indexing operations were still supported by
1810 /// direct implementation of `Index` and `IndexMut`.
1812 /// Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`.
1813 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1814 impl SliceIndex<str> for ops::RangeFull {
1817 fn get(self, slice: &str) -> Option<&Self::Output> {
1821 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1825 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1829 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1833 fn index(self, slice: &str) -> &Self::Output {
1837 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1842 /// Implements substring slicing with syntax `&self[begin .. end]` or `&mut
1843 /// self[begin .. end]`.
1845 /// Returns a slice of the given string from the byte range
1846 /// [`begin`, `end`).
1848 /// This operation is `O(1)`.
1850 /// Prior to 1.20.0, these indexing operations were still supported by
1851 /// direct implementation of `Index` and `IndexMut`.
1855 /// Panics if `begin` or `end` does not point to the starting byte offset of
1856 /// a character (as defined by `is_char_boundary`), if `begin > end`, or if
1862 /// let s = "Löwe 老虎 Léopard";
1863 /// assert_eq!(&s[0 .. 1], "L");
1865 /// assert_eq!(&s[1 .. 9], "öwe 老");
1867 /// // these will panic:
1868 /// // byte 2 lies within `ö`:
1871 /// // byte 8 lies within `老`
1874 /// // byte 100 is outside the string
1875 /// // &s[3 .. 100];
1877 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1878 impl SliceIndex<str> for ops::Range<usize> {
1881 fn get(self, slice: &str) -> Option<&Self::Output> {
1882 if self.start <= self.end
1883 && slice.is_char_boundary(self.start)
1884 && slice.is_char_boundary(self.end)
1886 // SAFETY: just checked that `start` and `end` are on a char boundary.
1887 Some(unsafe { self.get_unchecked(slice) })
1893 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1894 if self.start <= self.end
1895 && slice.is_char_boundary(self.start)
1896 && slice.is_char_boundary(self.end)
1898 // SAFETY: just checked that `start` and `end` are on a char boundary.
1899 Some(unsafe { self.get_unchecked_mut(slice) })
1905 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1906 let ptr = slice.as_ptr().add(self.start);
1907 let len = self.end - self.start;
1908 super::from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1911 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1912 let ptr = slice.as_mut_ptr().add(self.start);
1913 let len = self.end - self.start;
1914 super::from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, len))
1917 fn index(self, slice: &str) -> &Self::Output {
1918 let (start, end) = (self.start, self.end);
1919 self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, start, end))
1922 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1923 // is_char_boundary checks that the index is in [0, .len()]
1924 // cannot reuse `get` as above, because of NLL trouble
1925 if self.start <= self.end
1926 && slice.is_char_boundary(self.start)
1927 && slice.is_char_boundary(self.end)
1929 // SAFETY: just checked that `start` and `end` are on a char boundary.
1930 unsafe { self.get_unchecked_mut(slice) }
1932 super::slice_error_fail(slice, self.start, self.end)
1937 /// Implements substring slicing with syntax `&self[.. end]` or `&mut
1940 /// Returns a slice of the given string from the byte range [`0`, `end`).
1941 /// Equivalent to `&self[0 .. end]` or `&mut self[0 .. end]`.
1943 /// This operation is `O(1)`.
1945 /// Prior to 1.20.0, these indexing operations were still supported by
1946 /// direct implementation of `Index` and `IndexMut`.
1950 /// Panics if `end` does not point to the starting byte offset of a
1951 /// character (as defined by `is_char_boundary`), or if `end > len`.
1952 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1953 impl SliceIndex<str> for ops::RangeTo<usize> {
1956 fn get(self, slice: &str) -> Option<&Self::Output> {
1957 if slice.is_char_boundary(self.end) {
1958 // SAFETY: just checked that `end` is on a char boundary.
1959 Some(unsafe { self.get_unchecked(slice) })
1965 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1966 if slice.is_char_boundary(self.end) {
1967 // SAFETY: just checked that `end` is on a char boundary.
1968 Some(unsafe { self.get_unchecked_mut(slice) })
1974 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1975 let ptr = slice.as_ptr();
1976 super::from_utf8_unchecked(slice::from_raw_parts(ptr, self.end))
1979 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1980 let ptr = slice.as_mut_ptr();
1981 super::from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, self.end))
1984 fn index(self, slice: &str) -> &Self::Output {
1986 self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, 0, end))
1989 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1990 if slice.is_char_boundary(self.end) {
1991 // SAFETY: just checked that `end` is on a char boundary.
1992 unsafe { self.get_unchecked_mut(slice) }
1994 super::slice_error_fail(slice, 0, self.end)
1999 /// Implements substring slicing with syntax `&self[begin ..]` or `&mut
2000 /// self[begin ..]`.
2002 /// Returns a slice of the given string from the byte range [`begin`,
2003 /// `len`). Equivalent to `&self[begin .. len]` or `&mut self[begin ..
2006 /// This operation is `O(1)`.
2008 /// Prior to 1.20.0, these indexing operations were still supported by
2009 /// direct implementation of `Index` and `IndexMut`.
2013 /// Panics if `begin` does not point to the starting byte offset of
2014 /// a character (as defined by `is_char_boundary`), or if `begin >= len`.
2015 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2016 impl SliceIndex<str> for ops::RangeFrom<usize> {
2019 fn get(self, slice: &str) -> Option<&Self::Output> {
2020 if slice.is_char_boundary(self.start) {
2021 // SAFETY: just checked that `start` is on a char boundary.
2022 Some(unsafe { self.get_unchecked(slice) })
2028 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2029 if slice.is_char_boundary(self.start) {
2030 // SAFETY: just checked that `start` is on a char boundary.
2031 Some(unsafe { self.get_unchecked_mut(slice) })
2037 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
2038 let ptr = slice.as_ptr().add(self.start);
2039 let len = slice.len() - self.start;
2040 super::from_utf8_unchecked(slice::from_raw_parts(ptr, len))
2043 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
2044 let ptr = slice.as_mut_ptr().add(self.start);
2045 let len = slice.len() - self.start;
2046 super::from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, len))
2049 fn index(self, slice: &str) -> &Self::Output {
2050 let (start, end) = (self.start, slice.len());
2051 self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, start, end))
2054 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2055 if slice.is_char_boundary(self.start) {
2056 // SAFETY: just checked that `start` is on a char boundary.
2057 unsafe { self.get_unchecked_mut(slice) }
2059 super::slice_error_fail(slice, self.start, slice.len())
2064 /// Implements substring slicing with syntax `&self[begin ..= end]` or `&mut
2065 /// self[begin ..= end]`.
2067 /// Returns a slice of the given string from the byte range
2068 /// [`begin`, `end`]. Equivalent to `&self [begin .. end + 1]` or `&mut
2069 /// self[begin .. end + 1]`, except if `end` has the maximum value for
2072 /// This operation is `O(1)`.
2076 /// Panics if `begin` does not point to the starting byte offset of
2077 /// a character (as defined by `is_char_boundary`), if `end` does not point
2078 /// to the ending byte offset of a character (`end + 1` is either a starting
2079 /// byte offset or equal to `len`), if `begin > end`, or if `end >= len`.
2080 #[stable(feature = "inclusive_range", since = "1.26.0")]
2081 impl SliceIndex<str> for ops::RangeInclusive<usize> {
2084 fn get(self, slice: &str) -> Option<&Self::Output> {
2085 if *self.end() == usize::max_value() {
2088 (*self.start()..self.end() + 1).get(slice)
2092 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2093 if *self.end() == usize::max_value() {
2096 (*self.start()..self.end() + 1).get_mut(slice)
2100 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
2101 (*self.start()..self.end() + 1).get_unchecked(slice)
2104 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
2105 (*self.start()..self.end() + 1).get_unchecked_mut(slice)
2108 fn index(self, slice: &str) -> &Self::Output {
2109 if *self.end() == usize::max_value() {
2110 str_index_overflow_fail();
2112 (*self.start()..self.end() + 1).index(slice)
2115 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2116 if *self.end() == usize::max_value() {
2117 str_index_overflow_fail();
2119 (*self.start()..self.end() + 1).index_mut(slice)
2123 /// Implements substring slicing with syntax `&self[..= end]` or `&mut
2126 /// Returns a slice of the given string from the byte range [0, `end`].
2127 /// Equivalent to `&self [0 .. end + 1]`, except if `end` has the maximum
2128 /// value for `usize`.
2130 /// This operation is `O(1)`.
2134 /// Panics if `end` does not point to the ending byte offset of a character
2135 /// (`end + 1` is either a starting byte offset as defined by
2136 /// `is_char_boundary`, or equal to `len`), or if `end >= len`.
2137 #[stable(feature = "inclusive_range", since = "1.26.0")]
2138 impl SliceIndex<str> for ops::RangeToInclusive<usize> {
2141 fn get(self, slice: &str) -> Option<&Self::Output> {
2142 if self.end == usize::max_value() { None } else { (..self.end + 1).get(slice) }
2145 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2146 if self.end == usize::max_value() { None } else { (..self.end + 1).get_mut(slice) }
2149 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
2150 (..self.end + 1).get_unchecked(slice)
2153 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
2154 (..self.end + 1).get_unchecked_mut(slice)
2157 fn index(self, slice: &str) -> &Self::Output {
2158 if self.end == usize::max_value() {
2159 str_index_overflow_fail();
2161 (..self.end + 1).index(slice)
2164 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2165 if self.end == usize::max_value() {
2166 str_index_overflow_fail();
2168 (..self.end + 1).index_mut(slice)
2173 // truncate `&str` to length at most equal to `max`
2174 // return `true` if it were truncated, and the new str.
2175 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
2179 while !s.is_char_boundary(max) {
2188 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
2189 const MAX_DISPLAY_LENGTH: usize = 256;
2190 let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
2191 let ellipsis = if truncated { "[...]" } else { "" };
2194 if begin > s.len() || end > s.len() {
2195 let oob_index = if begin > s.len() { begin } else { end };
2196 panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
2202 "begin <= end ({} <= {}) when slicing `{}`{}",
2209 // 3. character boundary
2210 let index = if !s.is_char_boundary(begin) { begin } else { end };
2211 // find the character
2212 let mut char_start = index;
2213 while !s.is_char_boundary(char_start) {
2216 // `char_start` must be less than len and a char boundary
2217 let ch = s[char_start..].chars().next().unwrap();
2218 let char_range = char_start..char_start + ch.len_utf8();
2220 "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
2221 index, ch, char_range, s_trunc, ellipsis
2228 /// Returns the length of `self`.
2230 /// This length is in bytes, not [`char`]s or graphemes. In other words,
2231 /// it may not be what a human considers the length of the string.
2238 /// let len = "foo".len();
2239 /// assert_eq!(3, len);
2241 /// assert_eq!("ƒoo".len(), 4); // fancy f!
2242 /// assert_eq!("ƒoo".chars().count(), 3);
2244 #[stable(feature = "rust1", since = "1.0.0")]
2245 #[rustc_const_stable(feature = "const_str_len", since = "1.32.0")]
2247 pub const fn len(&self) -> usize {
2248 self.as_bytes().len()
2251 /// Returns `true` if `self` has a length of zero bytes.
2259 /// assert!(s.is_empty());
2261 /// let s = "not empty";
2262 /// assert!(!s.is_empty());
2265 #[stable(feature = "rust1", since = "1.0.0")]
2266 #[rustc_const_stable(feature = "const_str_is_empty", since = "1.32.0")]
2267 pub const fn is_empty(&self) -> bool {
2271 /// Checks that `index`-th byte lies at the start and/or end of a
2272 /// UTF-8 code point sequence.
2274 /// The start and end of the string (when `index == self.len()`) are
2275 /// considered to be
2278 /// Returns `false` if `index` is greater than `self.len()`.
2283 /// let s = "Löwe 老虎 Léopard";
2284 /// assert!(s.is_char_boundary(0));
2286 /// assert!(s.is_char_boundary(6));
2287 /// assert!(s.is_char_boundary(s.len()));
2289 /// // second byte of `ö`
2290 /// assert!(!s.is_char_boundary(2));
2292 /// // third byte of `老`
2293 /// assert!(!s.is_char_boundary(8));
2295 #[stable(feature = "is_char_boundary", since = "1.9.0")]
2297 pub fn is_char_boundary(&self, index: usize) -> bool {
2298 // 0 and len are always ok.
2299 // Test for 0 explicitly so that it can optimize out the check
2300 // easily and skip reading string data for that case.
2301 if index == 0 || index == self.len() {
2304 match self.as_bytes().get(index) {
2306 // This is bit magic equivalent to: b < 128 || b >= 192
2307 Some(&b) => (b as i8) >= -0x40,
2311 /// Converts a string slice to a byte slice. To convert the byte slice back
2312 /// into a string slice, use the [`str::from_utf8`] function.
2314 /// [`str::from_utf8`]: ./str/fn.from_utf8.html
2321 /// let bytes = "bors".as_bytes();
2322 /// assert_eq!(b"bors", bytes);
2324 #[stable(feature = "rust1", since = "1.0.0")]
2325 #[rustc_const_stable(feature = "str_as_bytes", since = "1.32.0")]
2327 #[allow(unused_attributes)]
2328 #[allow_internal_unstable(const_fn_union)]
2329 pub const fn as_bytes(&self) -> &[u8] {
2335 // SAFETY: const sound because we transmute two types with the same layout
2336 unsafe { Slices { str: self }.slice }
2339 /// Converts a mutable string slice to a mutable byte slice. To convert the
2340 /// mutable byte slice back into a mutable string slice, use the
2341 /// [`str::from_utf8_mut`] function.
2343 /// [`str::from_utf8_mut`]: ./str/fn.from_utf8_mut.html
2350 /// let mut s = String::from("Hello");
2351 /// let bytes = unsafe { s.as_bytes_mut() };
2353 /// assert_eq!(b"Hello", bytes);
2359 /// let mut s = String::from("🗻∈🌏");
2362 /// let bytes = s.as_bytes_mut();
2364 /// bytes[0] = 0xF0;
2365 /// bytes[1] = 0x9F;
2366 /// bytes[2] = 0x8D;
2367 /// bytes[3] = 0x94;
2370 /// assert_eq!("🍔∈🌏", s);
2372 #[stable(feature = "str_mut_extras", since = "1.20.0")]
2374 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
2375 &mut *(self as *mut str as *mut [u8])
2378 /// Converts a string slice to a raw pointer.
2380 /// As string slices are a slice of bytes, the raw pointer points to a
2381 /// [`u8`]. This pointer will be pointing to the first byte of the string
2384 /// The caller must ensure that the returned pointer is never written to.
2385 /// If you need to mutate the contents of the string slice, use [`as_mut_ptr`].
2387 /// [`u8`]: primitive.u8.html
2388 /// [`as_mut_ptr`]: #method.as_mut_ptr
2395 /// let s = "Hello";
2396 /// let ptr = s.as_ptr();
2398 #[stable(feature = "rust1", since = "1.0.0")]
2399 #[rustc_const_stable(feature = "rustc_str_as_ptr", since = "1.32.0")]
2401 pub const fn as_ptr(&self) -> *const u8 {
2402 self as *const str as *const u8
2405 /// Converts a mutable string slice to a raw pointer.
2407 /// As string slices are a slice of bytes, the raw pointer points to a
2408 /// [`u8`]. This pointer will be pointing to the first byte of the string
2411 /// It is your responsibility to make sure that the string slice only gets
2412 /// modified in a way that it remains valid UTF-8.
2414 /// [`u8`]: primitive.u8.html
2415 #[stable(feature = "str_as_mut_ptr", since = "1.36.0")]
2417 pub fn as_mut_ptr(&mut self) -> *mut u8 {
2418 self as *mut str as *mut u8
2421 /// Returns a subslice of `str`.
2423 /// This is the non-panicking alternative to indexing the `str`. Returns
2424 /// [`None`] whenever equivalent indexing operation would panic.
2426 /// [`None`]: option/enum.Option.html#variant.None
2431 /// let v = String::from("🗻∈🌏");
2433 /// assert_eq!(Some("🗻"), v.get(0..4));
2435 /// // indices not on UTF-8 sequence boundaries
2436 /// assert!(v.get(1..).is_none());
2437 /// assert!(v.get(..8).is_none());
2439 /// // out of bounds
2440 /// assert!(v.get(..42).is_none());
2442 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2444 pub fn get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output> {
2448 /// Returns a mutable subslice of `str`.
2450 /// This is the non-panicking alternative to indexing the `str`. Returns
2451 /// [`None`] whenever equivalent indexing operation would panic.
2453 /// [`None`]: option/enum.Option.html#variant.None
2458 /// let mut v = String::from("hello");
2459 /// // correct length
2460 /// assert!(v.get_mut(0..5).is_some());
2461 /// // out of bounds
2462 /// assert!(v.get_mut(..42).is_none());
2463 /// assert_eq!(Some("he"), v.get_mut(0..2).map(|v| &*v));
2465 /// assert_eq!("hello", v);
2467 /// let s = v.get_mut(0..2);
2468 /// let s = s.map(|s| {
2469 /// s.make_ascii_uppercase();
2472 /// assert_eq!(Some("HE"), s);
2474 /// assert_eq!("HEllo", v);
2476 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2478 pub fn get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output> {
2482 /// Returns an unchecked subslice of `str`.
2484 /// This is the unchecked alternative to indexing the `str`.
2488 /// Callers of this function are responsible that these preconditions are
2491 /// * The starting index must not exceed the ending index;
2492 /// * Indexes must be within bounds of the original slice;
2493 /// * Indexes must lie on UTF-8 sequence boundaries.
2495 /// Failing that, the returned string slice may reference invalid memory or
2496 /// violate the invariants communicated by the `str` type.
2503 /// assert_eq!("🗻", v.get_unchecked(0..4));
2504 /// assert_eq!("∈", v.get_unchecked(4..7));
2505 /// assert_eq!("🌏", v.get_unchecked(7..11));
2508 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2510 pub unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output {
2511 i.get_unchecked(self)
2514 /// Returns a mutable, unchecked subslice of `str`.
2516 /// This is the unchecked alternative to indexing the `str`.
2520 /// Callers of this function are responsible that these preconditions are
2523 /// * The starting index must not exceed the ending index;
2524 /// * Indexes must be within bounds of the original slice;
2525 /// * Indexes must lie on UTF-8 sequence boundaries.
2527 /// Failing that, the returned string slice may reference invalid memory or
2528 /// violate the invariants communicated by the `str` type.
2533 /// let mut v = String::from("🗻∈🌏");
2535 /// assert_eq!("🗻", v.get_unchecked_mut(0..4));
2536 /// assert_eq!("∈", v.get_unchecked_mut(4..7));
2537 /// assert_eq!("🌏", v.get_unchecked_mut(7..11));
2540 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2542 pub unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output {
2543 i.get_unchecked_mut(self)
2546 /// Creates a string slice from another string slice, bypassing safety
2549 /// This is generally not recommended, use with caution! For a safe
2550 /// alternative see [`str`] and [`Index`].
2552 /// [`str`]: primitive.str.html
2553 /// [`Index`]: ops/trait.Index.html
2555 /// This new slice goes from `begin` to `end`, including `begin` but
2556 /// excluding `end`.
2558 /// To get a mutable string slice instead, see the
2559 /// [`slice_mut_unchecked`] method.
2561 /// [`slice_mut_unchecked`]: #method.slice_mut_unchecked
2565 /// Callers of this function are responsible that three preconditions are
2568 /// * `begin` must not exceed `end`.
2569 /// * `begin` and `end` must be byte positions within the string slice.
2570 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2577 /// let s = "Löwe 老虎 Léopard";
2580 /// assert_eq!("Löwe 老虎 Léopard", s.slice_unchecked(0, 21));
2583 /// let s = "Hello, world!";
2586 /// assert_eq!("world", s.slice_unchecked(7, 12));
2589 #[stable(feature = "rust1", since = "1.0.0")]
2590 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked(begin..end)` instead")]
2592 pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
2593 (begin..end).get_unchecked(self)
2596 /// Creates a string slice from another string slice, bypassing safety
2598 /// This is generally not recommended, use with caution! For a safe
2599 /// alternative see [`str`] and [`IndexMut`].
2601 /// [`str`]: primitive.str.html
2602 /// [`IndexMut`]: ops/trait.IndexMut.html
2604 /// This new slice goes from `begin` to `end`, including `begin` but
2605 /// excluding `end`.
2607 /// To get an immutable string slice instead, see the
2608 /// [`slice_unchecked`] method.
2610 /// [`slice_unchecked`]: #method.slice_unchecked
2614 /// Callers of this function are responsible that three preconditions are
2617 /// * `begin` must not exceed `end`.
2618 /// * `begin` and `end` must be byte positions within the string slice.
2619 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2620 #[stable(feature = "str_slice_mut", since = "1.5.0")]
2621 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked_mut(begin..end)` instead")]
2623 pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
2624 (begin..end).get_unchecked_mut(self)
2627 /// Divide one string slice into two at an index.
2629 /// The argument, `mid`, should be a byte offset from the start of the
2630 /// string. It must also be on the boundary of a UTF-8 code point.
2632 /// The two slices returned go from the start of the string slice to `mid`,
2633 /// and from `mid` to the end of the string slice.
2635 /// To get mutable string slices instead, see the [`split_at_mut`]
2638 /// [`split_at_mut`]: #method.split_at_mut
2642 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2643 /// beyond the last code point of the string slice.
2650 /// let s = "Per Martin-Löf";
2652 /// let (first, last) = s.split_at(3);
2654 /// assert_eq!("Per", first);
2655 /// assert_eq!(" Martin-Löf", last);
2658 #[stable(feature = "str_split_at", since = "1.4.0")]
2659 pub fn split_at(&self, mid: usize) -> (&str, &str) {
2660 // is_char_boundary checks that the index is in [0, .len()]
2661 if self.is_char_boundary(mid) {
2662 // SAFETY: just checked that `mid` is on a char boundary.
2663 unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) }
2665 slice_error_fail(self, 0, mid)
2669 /// Divide one mutable string slice into two at an index.
2671 /// The argument, `mid`, should be a byte offset from the start of the
2672 /// string. It must also be on the boundary of a UTF-8 code point.
2674 /// The two slices returned go from the start of the string slice to `mid`,
2675 /// and from `mid` to the end of the string slice.
2677 /// To get immutable string slices instead, see the [`split_at`] method.
2679 /// [`split_at`]: #method.split_at
2683 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2684 /// beyond the last code point of the string slice.
2691 /// let mut s = "Per Martin-Löf".to_string();
2693 /// let (first, last) = s.split_at_mut(3);
2694 /// first.make_ascii_uppercase();
2695 /// assert_eq!("PER", first);
2696 /// assert_eq!(" Martin-Löf", last);
2698 /// assert_eq!("PER Martin-Löf", s);
2701 #[stable(feature = "str_split_at", since = "1.4.0")]
2702 pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2703 // is_char_boundary checks that the index is in [0, .len()]
2704 if self.is_char_boundary(mid) {
2705 let len = self.len();
2706 let ptr = self.as_mut_ptr();
2707 // SAFETY: just checked that `mid` is on a char boundary.
2710 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
2711 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr.add(mid), len - mid)),
2715 slice_error_fail(self, 0, mid)
2719 /// Returns an iterator over the [`char`]s of a string slice.
2721 /// As a string slice consists of valid UTF-8, we can iterate through a
2722 /// string slice by [`char`]. This method returns such an iterator.
2724 /// It's important to remember that [`char`] represents a Unicode Scalar
2725 /// Value, and may not match your idea of what a 'character' is. Iteration
2726 /// over grapheme clusters may be what you actually want. This functionality
2727 /// is not provided by Rust's standard library, check crates.io instead.
2734 /// let word = "goodbye";
2736 /// let count = word.chars().count();
2737 /// assert_eq!(7, count);
2739 /// let mut chars = word.chars();
2741 /// assert_eq!(Some('g'), chars.next());
2742 /// assert_eq!(Some('o'), chars.next());
2743 /// assert_eq!(Some('o'), chars.next());
2744 /// assert_eq!(Some('d'), chars.next());
2745 /// assert_eq!(Some('b'), chars.next());
2746 /// assert_eq!(Some('y'), chars.next());
2747 /// assert_eq!(Some('e'), chars.next());
2749 /// assert_eq!(None, chars.next());
2752 /// Remember, [`char`]s may not match your human intuition about characters:
2757 /// let mut chars = y.chars();
2759 /// assert_eq!(Some('y'), chars.next()); // not 'y̆'
2760 /// assert_eq!(Some('\u{0306}'), chars.next());
2762 /// assert_eq!(None, chars.next());
2764 #[stable(feature = "rust1", since = "1.0.0")]
2766 pub fn chars(&self) -> Chars<'_> {
2767 Chars { iter: self.as_bytes().iter() }
2770 /// Returns an iterator over the [`char`]s of a string slice, and their
2773 /// As a string slice consists of valid UTF-8, we can iterate through a
2774 /// string slice by [`char`]. This method returns an iterator of both
2775 /// these [`char`]s, as well as their byte positions.
2777 /// The iterator yields tuples. The position is first, the [`char`] is
2785 /// let word = "goodbye";
2787 /// let count = word.char_indices().count();
2788 /// assert_eq!(7, count);
2790 /// let mut char_indices = word.char_indices();
2792 /// assert_eq!(Some((0, 'g')), char_indices.next());
2793 /// assert_eq!(Some((1, 'o')), char_indices.next());
2794 /// assert_eq!(Some((2, 'o')), char_indices.next());
2795 /// assert_eq!(Some((3, 'd')), char_indices.next());
2796 /// assert_eq!(Some((4, 'b')), char_indices.next());
2797 /// assert_eq!(Some((5, 'y')), char_indices.next());
2798 /// assert_eq!(Some((6, 'e')), char_indices.next());
2800 /// assert_eq!(None, char_indices.next());
2803 /// Remember, [`char`]s may not match your human intuition about characters:
2806 /// let yes = "y̆es";
2808 /// let mut char_indices = yes.char_indices();
2810 /// assert_eq!(Some((0, 'y')), char_indices.next()); // not (0, 'y̆')
2811 /// assert_eq!(Some((1, '\u{0306}')), char_indices.next());
2813 /// // note the 3 here - the last character took up two bytes
2814 /// assert_eq!(Some((3, 'e')), char_indices.next());
2815 /// assert_eq!(Some((4, 's')), char_indices.next());
2817 /// assert_eq!(None, char_indices.next());
2819 #[stable(feature = "rust1", since = "1.0.0")]
2821 pub fn char_indices(&self) -> CharIndices<'_> {
2822 CharIndices { front_offset: 0, iter: self.chars() }
2825 /// An iterator over the bytes of a string slice.
2827 /// As a string slice consists of a sequence of bytes, we can iterate
2828 /// through a string slice by byte. This method returns such an iterator.
2835 /// let mut bytes = "bors".bytes();
2837 /// assert_eq!(Some(b'b'), bytes.next());
2838 /// assert_eq!(Some(b'o'), bytes.next());
2839 /// assert_eq!(Some(b'r'), bytes.next());
2840 /// assert_eq!(Some(b's'), bytes.next());
2842 /// assert_eq!(None, bytes.next());
2844 #[stable(feature = "rust1", since = "1.0.0")]
2846 pub fn bytes(&self) -> Bytes<'_> {
2847 Bytes(self.as_bytes().iter().copied())
2850 /// Splits a string slice by whitespace.
2852 /// The iterator returned will return string slices that are sub-slices of
2853 /// the original string slice, separated by any amount of whitespace.
2855 /// 'Whitespace' is defined according to the terms of the Unicode Derived
2856 /// Core Property `White_Space`. If you only want to split on ASCII whitespace
2857 /// instead, use [`split_ascii_whitespace`].
2859 /// [`split_ascii_whitespace`]: #method.split_ascii_whitespace
2866 /// let mut iter = "A few words".split_whitespace();
2868 /// assert_eq!(Some("A"), iter.next());
2869 /// assert_eq!(Some("few"), iter.next());
2870 /// assert_eq!(Some("words"), iter.next());
2872 /// assert_eq!(None, iter.next());
2875 /// All kinds of whitespace are considered:
2878 /// let mut iter = " Mary had\ta\u{2009}little \n\t lamb".split_whitespace();
2879 /// assert_eq!(Some("Mary"), iter.next());
2880 /// assert_eq!(Some("had"), iter.next());
2881 /// assert_eq!(Some("a"), iter.next());
2882 /// assert_eq!(Some("little"), iter.next());
2883 /// assert_eq!(Some("lamb"), iter.next());
2885 /// assert_eq!(None, iter.next());
2887 #[stable(feature = "split_whitespace", since = "1.1.0")]
2889 pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
2890 SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
2893 /// Splits a string slice by ASCII whitespace.
2895 /// The iterator returned will return string slices that are sub-slices of
2896 /// the original string slice, separated by any amount of ASCII whitespace.
2898 /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
2900 /// [`split_whitespace`]: #method.split_whitespace
2907 /// let mut iter = "A few words".split_ascii_whitespace();
2909 /// assert_eq!(Some("A"), iter.next());
2910 /// assert_eq!(Some("few"), iter.next());
2911 /// assert_eq!(Some("words"), iter.next());
2913 /// assert_eq!(None, iter.next());
2916 /// All kinds of ASCII whitespace are considered:
2919 /// let mut iter = " Mary had\ta little \n\t lamb".split_ascii_whitespace();
2920 /// assert_eq!(Some("Mary"), iter.next());
2921 /// assert_eq!(Some("had"), iter.next());
2922 /// assert_eq!(Some("a"), iter.next());
2923 /// assert_eq!(Some("little"), iter.next());
2924 /// assert_eq!(Some("lamb"), iter.next());
2926 /// assert_eq!(None, iter.next());
2928 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
2930 pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
2932 self.as_bytes().split(IsAsciiWhitespace).filter(BytesIsNotEmpty).map(UnsafeBytesToStr);
2933 SplitAsciiWhitespace { inner }
2936 /// An iterator over the lines of a string, as string slices.
2938 /// Lines are ended with either a newline (`\n`) or a carriage return with
2939 /// a line feed (`\r\n`).
2941 /// The final line ending is optional.
2948 /// let text = "foo\r\nbar\n\nbaz\n";
2949 /// let mut lines = text.lines();
2951 /// assert_eq!(Some("foo"), lines.next());
2952 /// assert_eq!(Some("bar"), lines.next());
2953 /// assert_eq!(Some(""), lines.next());
2954 /// assert_eq!(Some("baz"), lines.next());
2956 /// assert_eq!(None, lines.next());
2959 /// The final line ending isn't required:
2962 /// let text = "foo\nbar\n\r\nbaz";
2963 /// let mut lines = text.lines();
2965 /// assert_eq!(Some("foo"), lines.next());
2966 /// assert_eq!(Some("bar"), lines.next());
2967 /// assert_eq!(Some(""), lines.next());
2968 /// assert_eq!(Some("baz"), lines.next());
2970 /// assert_eq!(None, lines.next());
2972 #[stable(feature = "rust1", since = "1.0.0")]
2974 pub fn lines(&self) -> Lines<'_> {
2975 Lines(self.split_terminator('\n').map(LinesAnyMap))
2978 /// An iterator over the lines of a string.
2979 #[stable(feature = "rust1", since = "1.0.0")]
2980 #[rustc_deprecated(since = "1.4.0", reason = "use lines() instead now")]
2982 #[allow(deprecated)]
2983 pub fn lines_any(&self) -> LinesAny<'_> {
2984 LinesAny(self.lines())
2987 /// Returns an iterator of `u16` over the string encoded as UTF-16.
2994 /// let text = "Zażółć gęślą jaźń";
2996 /// let utf8_len = text.len();
2997 /// let utf16_len = text.encode_utf16().count();
2999 /// assert!(utf16_len <= utf8_len);
3001 #[stable(feature = "encode_utf16", since = "1.8.0")]
3002 pub fn encode_utf16(&self) -> EncodeUtf16<'_> {
3003 EncodeUtf16 { chars: self.chars(), extra: 0 }
3006 /// Returns `true` if the given pattern matches a sub-slice of
3007 /// this string slice.
3009 /// Returns `false` if it does not.
3016 /// let bananas = "bananas";
3018 /// assert!(bananas.contains("nana"));
3019 /// assert!(!bananas.contains("apples"));
3021 #[stable(feature = "rust1", since = "1.0.0")]
3023 pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
3024 pat.is_contained_in(self)
3027 /// Returns `true` if the given pattern matches a prefix of this
3030 /// Returns `false` if it does not.
3037 /// let bananas = "bananas";
3039 /// assert!(bananas.starts_with("bana"));
3040 /// assert!(!bananas.starts_with("nana"));
3042 #[stable(feature = "rust1", since = "1.0.0")]
3043 pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
3044 pat.is_prefix_of(self)
3047 /// Returns `true` if the given pattern matches a suffix of this
3050 /// Returns `false` if it does not.
3057 /// let bananas = "bananas";
3059 /// assert!(bananas.ends_with("anas"));
3060 /// assert!(!bananas.ends_with("nana"));
3062 #[stable(feature = "rust1", since = "1.0.0")]
3063 pub fn ends_with<'a, P>(&'a self, pat: P) -> bool
3065 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3067 pat.is_suffix_of(self)
3070 /// Returns the byte index of the first character of this string slice that
3071 /// matches the pattern.
3073 /// Returns [`None`] if the pattern doesn't match.
3075 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3076 /// a character matches.
3078 /// [`None`]: option/enum.Option.html#variant.None
3082 /// Simple patterns:
3085 /// let s = "Löwe 老虎 Léopard";
3087 /// assert_eq!(s.find('L'), Some(0));
3088 /// assert_eq!(s.find('é'), Some(14));
3089 /// assert_eq!(s.find("Léopard"), Some(13));
3092 /// More complex patterns using point-free style and closures:
3095 /// let s = "Löwe 老虎 Léopard";
3097 /// assert_eq!(s.find(char::is_whitespace), Some(5));
3098 /// assert_eq!(s.find(char::is_lowercase), Some(1));
3099 /// assert_eq!(s.find(|c: char| c.is_whitespace() || c.is_lowercase()), Some(1));
3100 /// assert_eq!(s.find(|c: char| (c < 'o') && (c > 'a')), Some(4));
3103 /// Not finding the pattern:
3106 /// let s = "Löwe 老虎 Léopard";
3107 /// let x: &[_] = &['1', '2'];
3109 /// assert_eq!(s.find(x), None);
3111 #[stable(feature = "rust1", since = "1.0.0")]
3113 pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
3114 pat.into_searcher(self).next_match().map(|(i, _)| i)
3117 /// Returns the byte index of the last character of this string slice that
3118 /// matches the pattern.
3120 /// Returns [`None`] if the pattern doesn't match.
3122 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3123 /// a character matches.
3125 /// [`None`]: option/enum.Option.html#variant.None
3129 /// Simple patterns:
3132 /// let s = "Löwe 老虎 Léopard";
3134 /// assert_eq!(s.rfind('L'), Some(13));
3135 /// assert_eq!(s.rfind('é'), Some(14));
3138 /// More complex patterns with closures:
3141 /// let s = "Löwe 老虎 Léopard";
3143 /// assert_eq!(s.rfind(char::is_whitespace), Some(12));
3144 /// assert_eq!(s.rfind(char::is_lowercase), Some(20));
3147 /// Not finding the pattern:
3150 /// let s = "Löwe 老虎 Léopard";
3151 /// let x: &[_] = &['1', '2'];
3153 /// assert_eq!(s.rfind(x), None);
3155 #[stable(feature = "rust1", since = "1.0.0")]
3157 pub fn rfind<'a, P>(&'a self, pat: P) -> Option<usize>
3159 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3161 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
3164 /// An iterator over substrings of this string slice, separated by
3165 /// characters matched by a pattern.
3167 /// The pattern can be any type that implements the Pattern trait. Notable
3168 /// examples are `&str`, [`char`], and closures that determines the split.
3170 /// # Iterator behavior
3172 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3173 /// allows a reverse search and forward/reverse search yields the same
3174 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3176 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3178 /// If the pattern allows a reverse search but its results might differ
3179 /// from a forward search, the [`rsplit`] method can be used.
3181 /// [`rsplit`]: #method.rsplit
3185 /// Simple patterns:
3188 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
3189 /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
3191 /// let v: Vec<&str> = "".split('X').collect();
3192 /// assert_eq!(v, [""]);
3194 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
3195 /// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
3197 /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect();
3198 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3200 /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect();
3201 /// assert_eq!(v, ["abc", "def", "ghi"]);
3203 /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect();
3204 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3207 /// A more complex pattern, using a closure:
3210 /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect();
3211 /// assert_eq!(v, ["abc", "def", "ghi"]);
3214 /// If a string contains multiple contiguous separators, you will end up
3215 /// with empty strings in the output:
3218 /// let x = "||||a||b|c".to_string();
3219 /// let d: Vec<_> = x.split('|').collect();
3221 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3224 /// Contiguous separators are separated by the empty string.
3227 /// let x = "(///)".to_string();
3228 /// let d: Vec<_> = x.split('/').collect();
3230 /// assert_eq!(d, &["(", "", "", ")"]);
3233 /// Separators at the start or end of a string are neighbored
3234 /// by empty strings.
3237 /// let d: Vec<_> = "010".split("0").collect();
3238 /// assert_eq!(d, &["", "1", ""]);
3241 /// When the empty string is used as a separator, it separates
3242 /// every character in the string, along with the beginning
3243 /// and end of the string.
3246 /// let f: Vec<_> = "rust".split("").collect();
3247 /// assert_eq!(f, &["", "r", "u", "s", "t", ""]);
3250 /// Contiguous separators can lead to possibly surprising behavior
3251 /// when whitespace is used as the separator. This code is correct:
3254 /// let x = " a b c".to_string();
3255 /// let d: Vec<_> = x.split(' ').collect();
3257 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3260 /// It does _not_ give you:
3263 /// assert_eq!(d, &["a", "b", "c"]);
3266 /// Use [`split_whitespace`] for this behavior.
3268 /// [`split_whitespace`]: #method.split_whitespace
3269 #[stable(feature = "rust1", since = "1.0.0")]
3271 pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
3272 Split(SplitInternal {
3275 matcher: pat.into_searcher(self),
3276 allow_trailing_empty: true,
3281 /// An iterator over substrings of this string slice, separated by
3282 /// characters matched by a pattern. Differs from the iterator produced by
3283 /// `split` in that `split_inclusive` leaves the matched part as the
3284 /// terminator of the substring.
3289 /// #![feature(split_inclusive)]
3290 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
3291 /// .split_inclusive('\n').collect();
3292 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
3295 /// If the last element of the string is matched,
3296 /// that element will be considered the terminator of the preceding substring.
3297 /// That substring will be the last item returned by the iterator.
3300 /// #![feature(split_inclusive)]
3301 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n"
3302 /// .split_inclusive('\n').collect();
3303 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb.\n"]);
3305 #[unstable(feature = "split_inclusive", issue = "none")]
3307 pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> {
3308 SplitInclusive(SplitInternal {
3311 matcher: pat.into_searcher(self),
3312 allow_trailing_empty: false,
3317 /// An iterator over substrings of the given string slice, separated by
3318 /// characters matched by a pattern and yielded in reverse order.
3320 /// The pattern can be any type that implements the Pattern trait. Notable
3321 /// examples are `&str`, [`char`], and closures that determines the split.
3323 /// # Iterator behavior
3325 /// The returned iterator requires that the pattern supports a reverse
3326 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3327 /// search yields the same elements.
3329 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3331 /// For iterating from the front, the [`split`] method can be used.
3333 /// [`split`]: #method.split
3337 /// Simple patterns:
3340 /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
3341 /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
3343 /// let v: Vec<&str> = "".rsplit('X').collect();
3344 /// assert_eq!(v, [""]);
3346 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect();
3347 /// assert_eq!(v, ["leopard", "tiger", "", "lion"]);
3349 /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
3350 /// assert_eq!(v, ["leopard", "tiger", "lion"]);
3353 /// A more complex pattern, using a closure:
3356 /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect();
3357 /// assert_eq!(v, ["ghi", "def", "abc"]);
3359 #[stable(feature = "rust1", since = "1.0.0")]
3361 pub fn rsplit<'a, P>(&'a self, pat: P) -> RSplit<'a, P>
3363 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3365 RSplit(self.split(pat).0)
3368 /// An iterator over substrings of the given string slice, separated by
3369 /// characters matched by a pattern.
3371 /// The pattern can be any type that implements the Pattern trait. Notable
3372 /// examples are `&str`, [`char`], and closures that determines the split.
3374 /// Equivalent to [`split`], except that the trailing substring
3375 /// is skipped if empty.
3377 /// [`split`]: #method.split
3379 /// This method can be used for string data that is _terminated_,
3380 /// rather than _separated_ by a pattern.
3382 /// # Iterator behavior
3384 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3385 /// allows a reverse search and forward/reverse search yields the same
3386 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3388 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3390 /// If the pattern allows a reverse search but its results might differ
3391 /// from a forward search, the [`rsplit_terminator`] method can be used.
3393 /// [`rsplit_terminator`]: #method.rsplit_terminator
3400 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
3401 /// assert_eq!(v, ["A", "B"]);
3403 /// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
3404 /// assert_eq!(v, ["A", "", "B", ""]);
3406 #[stable(feature = "rust1", since = "1.0.0")]
3408 pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
3409 SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 })
3412 /// An iterator over substrings of `self`, separated by characters
3413 /// matched by a pattern and yielded in reverse order.
3415 /// The pattern can be any type that implements the Pattern trait. Notable
3416 /// examples are `&str`, [`char`], and closures that determines the split.
3417 /// Additional libraries might provide more complex patterns like
3418 /// regular expressions.
3420 /// Equivalent to [`split`], except that the trailing substring is
3421 /// skipped if empty.
3423 /// [`split`]: #method.split
3425 /// This method can be used for string data that is _terminated_,
3426 /// rather than _separated_ by a pattern.
3428 /// # Iterator behavior
3430 /// The returned iterator requires that the pattern supports a
3431 /// reverse search, and it will be double ended if a forward/reverse
3432 /// search yields the same elements.
3434 /// For iterating from the front, the [`split_terminator`] method can be
3437 /// [`split_terminator`]: #method.split_terminator
3442 /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect();
3443 /// assert_eq!(v, ["B", "A"]);
3445 /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
3446 /// assert_eq!(v, ["", "B", "", "A"]);
3448 #[stable(feature = "rust1", since = "1.0.0")]
3450 pub fn rsplit_terminator<'a, P>(&'a self, pat: P) -> RSplitTerminator<'a, P>
3452 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3454 RSplitTerminator(self.split_terminator(pat).0)
3457 /// An iterator over substrings of the given string slice, separated by a
3458 /// pattern, restricted to returning at most `n` items.
3460 /// If `n` substrings are returned, the last substring (the `n`th substring)
3461 /// will contain the remainder of the string.
3463 /// The pattern can be any type that implements the Pattern trait. Notable
3464 /// examples are `&str`, [`char`], and closures that determines the split.
3466 /// # Iterator behavior
3468 /// The returned iterator will not be double ended, because it is
3469 /// not efficient to support.
3471 /// If the pattern allows a reverse search, the [`rsplitn`] method can be
3474 /// [`rsplitn`]: #method.rsplitn
3478 /// Simple patterns:
3481 /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect();
3482 /// assert_eq!(v, ["Mary", "had", "a little lambda"]);
3484 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect();
3485 /// assert_eq!(v, ["lion", "", "tigerXleopard"]);
3487 /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
3488 /// assert_eq!(v, ["abcXdef"]);
3490 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
3491 /// assert_eq!(v, [""]);
3494 /// A more complex pattern, using a closure:
3497 /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect();
3498 /// assert_eq!(v, ["abc", "defXghi"]);
3500 #[stable(feature = "rust1", since = "1.0.0")]
3502 pub fn splitn<'a, P: Pattern<'a>>(&'a self, n: usize, pat: P) -> SplitN<'a, P> {
3503 SplitN(SplitNInternal { iter: self.split(pat).0, count: n })
3506 /// An iterator over substrings of this string slice, separated by a
3507 /// pattern, starting from the end of the string, restricted to returning
3508 /// at most `n` items.
3510 /// If `n` substrings are returned, the last substring (the `n`th substring)
3511 /// will contain the remainder of the string.
3513 /// The pattern can be any type that implements the Pattern trait. Notable
3514 /// examples are `&str`, [`char`], and closures that determines the split.
3516 /// # Iterator behavior
3518 /// The returned iterator will not be double ended, because it is not
3519 /// efficient to support.
3521 /// For splitting from the front, the [`splitn`] method can be used.
3523 /// [`splitn`]: #method.splitn
3527 /// Simple patterns:
3530 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect();
3531 /// assert_eq!(v, ["lamb", "little", "Mary had a"]);
3533 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect();
3534 /// assert_eq!(v, ["leopard", "tiger", "lionX"]);
3536 /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect();
3537 /// assert_eq!(v, ["leopard", "lion::tiger"]);
3540 /// A more complex pattern, using a closure:
3543 /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect();
3544 /// assert_eq!(v, ["ghi", "abc1def"]);
3546 #[stable(feature = "rust1", since = "1.0.0")]
3548 pub fn rsplitn<'a, P>(&'a self, n: usize, pat: P) -> RSplitN<'a, P>
3550 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3552 RSplitN(self.splitn(n, pat).0)
3555 /// An iterator over the disjoint matches of a pattern within the given string
3558 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3559 /// a character matches.
3561 /// # Iterator behavior
3563 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3564 /// allows a reverse search and forward/reverse search yields the same
3565 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3567 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3569 /// If the pattern allows a reverse search but its results might differ
3570 /// from a forward search, the [`rmatches`] method can be used.
3572 /// [`rmatches`]: #method.rmatches
3579 /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
3580 /// assert_eq!(v, ["abc", "abc", "abc"]);
3582 /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect();
3583 /// assert_eq!(v, ["1", "2", "3"]);
3585 #[stable(feature = "str_matches", since = "1.2.0")]
3587 pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
3588 Matches(MatchesInternal(pat.into_searcher(self)))
3591 /// An iterator over the disjoint matches of a pattern within this string slice,
3592 /// yielded in reverse order.
3594 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3595 /// a character matches.
3597 /// # Iterator behavior
3599 /// The returned iterator requires that the pattern supports a reverse
3600 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3601 /// search yields the same elements.
3603 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3605 /// For iterating from the front, the [`matches`] method can be used.
3607 /// [`matches`]: #method.matches
3614 /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
3615 /// assert_eq!(v, ["abc", "abc", "abc"]);
3617 /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect();
3618 /// assert_eq!(v, ["3", "2", "1"]);
3620 #[stable(feature = "str_matches", since = "1.2.0")]
3622 pub fn rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P>
3624 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3626 RMatches(self.matches(pat).0)
3629 /// An iterator over the disjoint matches of a pattern within this string
3630 /// slice as well as the index that the match starts at.
3632 /// For matches of `pat` within `self` that overlap, only the indices
3633 /// corresponding to the first match are returned.
3635 /// The pattern can be a `&str`, [`char`], or a closure that determines
3636 /// if a character matches.
3638 /// # Iterator behavior
3640 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3641 /// allows a reverse search and forward/reverse search yields the same
3642 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3644 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3646 /// If the pattern allows a reverse search but its results might differ
3647 /// from a forward search, the [`rmatch_indices`] method can be used.
3649 /// [`rmatch_indices`]: #method.rmatch_indices
3656 /// let v: Vec<_> = "abcXXXabcYYYabc".match_indices("abc").collect();
3657 /// assert_eq!(v, [(0, "abc"), (6, "abc"), (12, "abc")]);
3659 /// let v: Vec<_> = "1abcabc2".match_indices("abc").collect();
3660 /// assert_eq!(v, [(1, "abc"), (4, "abc")]);
3662 /// let v: Vec<_> = "ababa".match_indices("aba").collect();
3663 /// assert_eq!(v, [(0, "aba")]); // only the first `aba`
3665 #[stable(feature = "str_match_indices", since = "1.5.0")]
3667 pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
3668 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
3671 /// An iterator over the disjoint matches of a pattern within `self`,
3672 /// yielded in reverse order along with the index of the match.
3674 /// For matches of `pat` within `self` that overlap, only the indices
3675 /// corresponding to the last match are returned.
3677 /// The pattern can be a `&str`, [`char`], or a closure that determines if a
3678 /// character matches.
3680 /// # Iterator behavior
3682 /// The returned iterator requires that the pattern supports a reverse
3683 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3684 /// search yields the same elements.
3686 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3688 /// For iterating from the front, the [`match_indices`] method can be used.
3690 /// [`match_indices`]: #method.match_indices
3697 /// let v: Vec<_> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
3698 /// assert_eq!(v, [(12, "abc"), (6, "abc"), (0, "abc")]);
3700 /// let v: Vec<_> = "1abcabc2".rmatch_indices("abc").collect();
3701 /// assert_eq!(v, [(4, "abc"), (1, "abc")]);
3703 /// let v: Vec<_> = "ababa".rmatch_indices("aba").collect();
3704 /// assert_eq!(v, [(2, "aba")]); // only the last `aba`
3706 #[stable(feature = "str_match_indices", since = "1.5.0")]
3708 pub fn rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P>
3710 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3712 RMatchIndices(self.match_indices(pat).0)
3715 /// Returns a string slice with leading and trailing whitespace removed.
3717 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3718 /// Core Property `White_Space`.
3725 /// let s = " Hello\tworld\t";
3727 /// assert_eq!("Hello\tworld", s.trim());
3729 #[must_use = "this returns the trimmed string as a slice, \
3730 without modifying the original"]
3731 #[stable(feature = "rust1", since = "1.0.0")]
3732 pub fn trim(&self) -> &str {
3733 self.trim_matches(|c: char| c.is_whitespace())
3736 /// Returns a string slice with leading whitespace removed.
3738 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3739 /// Core Property `White_Space`.
3741 /// # Text directionality
3743 /// A string is a sequence of bytes. `start` in this context means the first
3744 /// position of that byte string; for a left-to-right language like English or
3745 /// Russian, this will be left side, and for right-to-left languages like
3746 /// Arabic or Hebrew, this will be the right side.
3753 /// let s = " Hello\tworld\t";
3754 /// assert_eq!("Hello\tworld\t", s.trim_start());
3760 /// let s = " English ";
3761 /// assert!(Some('E') == s.trim_start().chars().next());
3763 /// let s = " עברית ";
3764 /// assert!(Some('ע') == s.trim_start().chars().next());
3766 #[must_use = "this returns the trimmed string as a new slice, \
3767 without modifying the original"]
3768 #[stable(feature = "trim_direction", since = "1.30.0")]
3769 pub fn trim_start(&self) -> &str {
3770 self.trim_start_matches(|c: char| c.is_whitespace())
3773 /// Returns a string slice with trailing whitespace removed.
3775 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3776 /// Core Property `White_Space`.
3778 /// # Text directionality
3780 /// A string is a sequence of bytes. `end` in this context means the last
3781 /// position of that byte string; for a left-to-right language like English or
3782 /// Russian, this will be right side, and for right-to-left languages like
3783 /// Arabic or Hebrew, this will be the left side.
3790 /// let s = " Hello\tworld\t";
3791 /// assert_eq!(" Hello\tworld", s.trim_end());
3797 /// let s = " English ";
3798 /// assert!(Some('h') == s.trim_end().chars().rev().next());
3800 /// let s = " עברית ";
3801 /// assert!(Some('ת') == s.trim_end().chars().rev().next());
3803 #[must_use = "this returns the trimmed string as a new slice, \
3804 without modifying the original"]
3805 #[stable(feature = "trim_direction", since = "1.30.0")]
3806 pub fn trim_end(&self) -> &str {
3807 self.trim_end_matches(|c: char| c.is_whitespace())
3810 /// Returns a string slice with leading whitespace removed.
3812 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3813 /// Core Property `White_Space`.
3815 /// # Text directionality
3817 /// A string is a sequence of bytes. 'Left' in this context means the first
3818 /// position of that byte string; for a language like Arabic or Hebrew
3819 /// which are 'right to left' rather than 'left to right', this will be
3820 /// the _right_ side, not the left.
3827 /// let s = " Hello\tworld\t";
3829 /// assert_eq!("Hello\tworld\t", s.trim_left());
3835 /// let s = " English";
3836 /// assert!(Some('E') == s.trim_left().chars().next());
3838 /// let s = " עברית";
3839 /// assert!(Some('ע') == s.trim_left().chars().next());
3841 #[stable(feature = "rust1", since = "1.0.0")]
3844 reason = "superseded by `trim_start`",
3845 suggestion = "trim_start"
3847 pub fn trim_left(&self) -> &str {
3851 /// Returns a string slice with trailing whitespace removed.
3853 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3854 /// Core Property `White_Space`.
3856 /// # Text directionality
3858 /// A string is a sequence of bytes. 'Right' in this context means the last
3859 /// position of that byte string; for a language like Arabic or Hebrew
3860 /// which are 'right to left' rather than 'left to right', this will be
3861 /// the _left_ side, not the right.
3868 /// let s = " Hello\tworld\t";
3870 /// assert_eq!(" Hello\tworld", s.trim_right());
3876 /// let s = "English ";
3877 /// assert!(Some('h') == s.trim_right().chars().rev().next());
3879 /// let s = "עברית ";
3880 /// assert!(Some('ת') == s.trim_right().chars().rev().next());
3882 #[stable(feature = "rust1", since = "1.0.0")]
3885 reason = "superseded by `trim_end`",
3886 suggestion = "trim_end"
3888 pub fn trim_right(&self) -> &str {
3892 /// Returns a string slice with all prefixes and suffixes that match a
3893 /// pattern repeatedly removed.
3895 /// The pattern can be a [`char`] or a closure that determines if a
3896 /// character matches.
3900 /// Simple patterns:
3903 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
3904 /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar");
3906 /// let x: &[_] = &['1', '2'];
3907 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
3910 /// A more complex pattern, using a closure:
3913 /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar");
3915 #[must_use = "this returns the trimmed string as a new slice, \
3916 without modifying the original"]
3917 #[stable(feature = "rust1", since = "1.0.0")]
3918 pub fn trim_matches<'a, P>(&'a self, pat: P) -> &'a str
3920 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
3924 let mut matcher = pat.into_searcher(self);
3925 if let Some((a, b)) = matcher.next_reject() {
3927 j = b; // Remember earliest known match, correct it below if
3928 // last match is different
3930 if let Some((_, b)) = matcher.next_reject_back() {
3933 // SAFETY: `Searcher` is known to return valid indices.
3934 unsafe { self.get_unchecked(i..j) }
3937 /// Returns a string slice with all prefixes that match a pattern
3938 /// repeatedly removed.
3940 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3941 /// a character matches.
3943 /// # Text directionality
3945 /// A string is a sequence of bytes. `start` in this context means the first
3946 /// position of that byte string; for a left-to-right language like English or
3947 /// Russian, this will be left side, and for right-to-left languages like
3948 /// Arabic or Hebrew, this will be the right side.
3955 /// assert_eq!("11foo1bar11".trim_start_matches('1'), "foo1bar11");
3956 /// assert_eq!("123foo1bar123".trim_start_matches(char::is_numeric), "foo1bar123");
3958 /// let x: &[_] = &['1', '2'];
3959 /// assert_eq!("12foo1bar12".trim_start_matches(x), "foo1bar12");
3961 #[must_use = "this returns the trimmed string as a new slice, \
3962 without modifying the original"]
3963 #[stable(feature = "trim_direction", since = "1.30.0")]
3964 pub fn trim_start_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
3965 let mut i = self.len();
3966 let mut matcher = pat.into_searcher(self);
3967 if let Some((a, _)) = matcher.next_reject() {
3970 // SAFETY: `Searcher` is known to return valid indices.
3971 unsafe { self.get_unchecked(i..self.len()) }
3974 /// Returns a string slice with the prefix removed.
3976 /// If the string starts with the pattern `prefix`, `Some` is returned with the substring where
3977 /// the prefix is removed. Unlike `trim_start_matches`, this method removes the prefix exactly
3980 /// If the string does not start with `prefix`, `None` is returned.
3985 /// #![feature(str_strip)]
3987 /// assert_eq!("foobar".strip_prefix("foo"), Some("bar"));
3988 /// assert_eq!("foobar".strip_prefix("bar"), None);
3989 /// assert_eq!("foofoo".strip_prefix("foo"), Some("foo"));
3991 #[must_use = "this returns the remaining substring as a new slice, \
3992 without modifying the original"]
3993 #[unstable(feature = "str_strip", reason = "newly added", issue = "67302")]
3994 pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> {
3995 let mut matcher = prefix.into_searcher(self);
3996 if let SearchStep::Match(start, len) = matcher.next() {
3999 "The first search step from Searcher \
4000 must include the first character"
4002 // SAFETY: `Searcher` is known to return valid indices.
4003 unsafe { Some(self.get_unchecked(len..)) }
4009 /// Returns a string slice with the suffix removed.
4011 /// If the string ends with the pattern `suffix`, `Some` is returned with the substring where
4012 /// the suffix is removed. Unlike `trim_end_matches`, this method removes the suffix exactly
4015 /// If the string does not end with `suffix`, `None` is returned.
4020 /// #![feature(str_strip)]
4021 /// assert_eq!("barfoo".strip_suffix("foo"), Some("bar"));
4022 /// assert_eq!("barfoo".strip_suffix("bar"), None);
4023 /// assert_eq!("foofoo".strip_suffix("foo"), Some("foo"));
4025 #[must_use = "this returns the remaining substring as a new slice, \
4026 without modifying the original"]
4027 #[unstable(feature = "str_strip", reason = "newly added", issue = "67302")]
4028 pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a str>
4031 <P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
4033 let mut matcher = suffix.into_searcher(self);
4034 if let SearchStep::Match(start, end) = matcher.next_back() {
4038 "The first search step from ReverseSearcher \
4039 must include the last character"
4041 // SAFETY: `Searcher` is known to return valid indices.
4042 unsafe { Some(self.get_unchecked(..start)) }
4048 /// Returns a string slice with all suffixes that match a pattern
4049 /// repeatedly removed.
4051 /// The pattern can be a `&str`, [`char`], or a closure that
4052 /// determines if a character matches.
4054 /// # Text directionality
4056 /// A string is a sequence of bytes. `end` in this context means the last
4057 /// position of that byte string; for a left-to-right language like English or
4058 /// Russian, this will be right side, and for right-to-left languages like
4059 /// Arabic or Hebrew, this will be the left side.
4063 /// Simple patterns:
4066 /// assert_eq!("11foo1bar11".trim_end_matches('1'), "11foo1bar");
4067 /// assert_eq!("123foo1bar123".trim_end_matches(char::is_numeric), "123foo1bar");
4069 /// let x: &[_] = &['1', '2'];
4070 /// assert_eq!("12foo1bar12".trim_end_matches(x), "12foo1bar");
4073 /// A more complex pattern, using a closure:
4076 /// assert_eq!("1fooX".trim_end_matches(|c| c == '1' || c == 'X'), "1foo");
4078 #[must_use = "this returns the trimmed string as a new slice, \
4079 without modifying the original"]
4080 #[stable(feature = "trim_direction", since = "1.30.0")]
4081 pub fn trim_end_matches<'a, P>(&'a self, pat: P) -> &'a str
4083 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
4086 let mut matcher = pat.into_searcher(self);
4087 if let Some((_, b)) = matcher.next_reject_back() {
4090 // SAFETY: `Searcher` is known to return valid indices.
4091 unsafe { self.get_unchecked(0..j) }
4094 /// Returns a string slice with all prefixes that match a pattern
4095 /// repeatedly removed.
4097 /// The pattern can be a `&str`, [`char`], or a closure that determines if
4098 /// a character matches.
4100 /// [`char`]: primitive.char.html
4102 /// # Text directionality
4104 /// A string is a sequence of bytes. 'Left' in this context means the first
4105 /// position of that byte string; for a language like Arabic or Hebrew
4106 /// which are 'right to left' rather than 'left to right', this will be
4107 /// the _right_ side, not the left.
4114 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
4115 /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123");
4117 /// let x: &[_] = &['1', '2'];
4118 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
4120 #[stable(feature = "rust1", since = "1.0.0")]
4123 reason = "superseded by `trim_start_matches`",
4124 suggestion = "trim_start_matches"
4126 pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
4127 self.trim_start_matches(pat)
4130 /// Returns a string slice with all suffixes that match a pattern
4131 /// repeatedly removed.
4133 /// The pattern can be a `&str`, [`char`], or a closure that
4134 /// determines if a character matches.
4136 /// [`char`]: primitive.char.html
4138 /// # Text directionality
4140 /// A string is a sequence of bytes. 'Right' in this context means the last
4141 /// position of that byte string; for a language like Arabic or Hebrew
4142 /// which are 'right to left' rather than 'left to right', this will be
4143 /// the _left_ side, not the right.
4147 /// Simple patterns:
4150 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
4151 /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar");
4153 /// let x: &[_] = &['1', '2'];
4154 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
4157 /// A more complex pattern, using a closure:
4160 /// assert_eq!("1fooX".trim_right_matches(|c| c == '1' || c == 'X'), "1foo");
4162 #[stable(feature = "rust1", since = "1.0.0")]
4165 reason = "superseded by `trim_end_matches`",
4166 suggestion = "trim_end_matches"
4168 pub fn trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str
4170 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
4172 self.trim_end_matches(pat)
4175 /// Parses this string slice into another type.
4177 /// Because `parse` is so general, it can cause problems with type
4178 /// inference. As such, `parse` is one of the few times you'll see
4179 /// the syntax affectionately known as the 'turbofish': `::<>`. This
4180 /// helps the inference algorithm understand specifically which type
4181 /// you're trying to parse into.
4183 /// `parse` can parse any type that implements the [`FromStr`] trait.
4185 /// [`FromStr`]: str/trait.FromStr.html
4189 /// Will return [`Err`] if it's not possible to parse this string slice into
4190 /// the desired type.
4192 /// [`Err`]: str/trait.FromStr.html#associatedtype.Err
4199 /// let four: u32 = "4".parse().unwrap();
4201 /// assert_eq!(4, four);
4204 /// Using the 'turbofish' instead of annotating `four`:
4207 /// let four = "4".parse::<u32>();
4209 /// assert_eq!(Ok(4), four);
4212 /// Failing to parse:
4215 /// let nope = "j".parse::<u32>();
4217 /// assert!(nope.is_err());
4220 #[stable(feature = "rust1", since = "1.0.0")]
4221 pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
4222 FromStr::from_str(self)
4225 /// Checks if all characters in this string are within the ASCII range.
4230 /// let ascii = "hello!\n";
4231 /// let non_ascii = "Grüße, Jürgen ❤";
4233 /// assert!(ascii.is_ascii());
4234 /// assert!(!non_ascii.is_ascii());
4236 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4238 pub fn is_ascii(&self) -> bool {
4239 // We can treat each byte as character here: all multibyte characters
4240 // start with a byte that is not in the ascii range, so we will stop
4242 self.bytes().all(|b| b.is_ascii())
4245 /// Checks that two strings are an ASCII case-insensitive match.
4247 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
4248 /// but without allocating and copying temporaries.
4253 /// assert!("Ferris".eq_ignore_ascii_case("FERRIS"));
4254 /// assert!("Ferrös".eq_ignore_ascii_case("FERRöS"));
4255 /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
4257 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4259 pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
4260 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
4263 /// Converts this string to its ASCII upper case equivalent in-place.
4265 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
4266 /// but non-ASCII letters are unchanged.
4268 /// To return a new uppercased value without modifying the existing one, use
4269 /// [`to_ascii_uppercase`].
4271 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
4276 /// let mut s = String::from("Grüße, Jürgen ❤");
4278 /// s.make_ascii_uppercase();
4280 /// assert_eq!("GRüßE, JüRGEN ❤", s);
4282 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4283 pub fn make_ascii_uppercase(&mut self) {
4284 // SAFETY: safe because we transmute two types with the same layout.
4285 let me = unsafe { self.as_bytes_mut() };
4286 me.make_ascii_uppercase()
4289 /// Converts this string to its ASCII lower case equivalent in-place.
4291 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
4292 /// but non-ASCII letters are unchanged.
4294 /// To return a new lowercased value without modifying the existing one, use
4295 /// [`to_ascii_lowercase`].
4297 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
4302 /// let mut s = String::from("GRÜßE, JÜRGEN ❤");
4304 /// s.make_ascii_lowercase();
4306 /// assert_eq!("grÜße, jÜrgen ❤", s);
4308 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4309 pub fn make_ascii_lowercase(&mut self) {
4310 // SAFETY: safe because we transmute two types with the same layout.
4311 let me = unsafe { self.as_bytes_mut() };
4312 me.make_ascii_lowercase()
4315 /// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
4317 /// Note: only extended grapheme codepoints that begin the string will be
4320 /// [`char::escape_debug`]: ../std/primitive.char.html#method.escape_debug
4327 /// for c in "❤\n!".escape_debug() {
4328 /// print!("{}", c);
4333 /// Using `println!` directly:
4336 /// println!("{}", "❤\n!".escape_debug());
4340 /// Both are equivalent to:
4343 /// println!("❤\\n!");
4346 /// Using `to_string`:
4349 /// assert_eq!("❤\n!".escape_debug().to_string(), "❤\\n!");
4351 #[stable(feature = "str_escape", since = "1.34.0")]
4352 pub fn escape_debug(&self) -> EscapeDebug<'_> {
4353 let mut chars = self.chars();
4357 .map(|first| first.escape_debug_ext(true))
4360 .chain(chars.flat_map(CharEscapeDebugContinue)),
4364 /// Return an iterator that escapes each char in `self` with [`char::escape_default`].
4366 /// [`char::escape_default`]: ../std/primitive.char.html#method.escape_default
4373 /// for c in "❤\n!".escape_default() {
4374 /// print!("{}", c);
4379 /// Using `println!` directly:
4382 /// println!("{}", "❤\n!".escape_default());
4386 /// Both are equivalent to:
4389 /// println!("\\u{{2764}}\\n!");
4392 /// Using `to_string`:
4395 /// assert_eq!("❤\n!".escape_default().to_string(), "\\u{2764}\\n!");
4397 #[stable(feature = "str_escape", since = "1.34.0")]
4398 pub fn escape_default(&self) -> EscapeDefault<'_> {
4399 EscapeDefault { inner: self.chars().flat_map(CharEscapeDefault) }
4402 /// Return an iterator that escapes each char in `self` with [`char::escape_unicode`].
4404 /// [`char::escape_unicode`]: ../std/primitive.char.html#method.escape_unicode
4411 /// for c in "❤\n!".escape_unicode() {
4412 /// print!("{}", c);
4417 /// Using `println!` directly:
4420 /// println!("{}", "❤\n!".escape_unicode());
4424 /// Both are equivalent to:
4427 /// println!("\\u{{2764}}\\u{{a}}\\u{{21}}");
4430 /// Using `to_string`:
4433 /// assert_eq!("❤\n!".escape_unicode().to_string(), "\\u{2764}\\u{a}\\u{21}");
4435 #[stable(feature = "str_escape", since = "1.34.0")]
4436 pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
4437 EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) }
4443 struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
4444 c.escape_debug_ext(false)
4448 struct CharEscapeUnicode impl Fn = |c: char| -> char::EscapeUnicode {
4452 struct CharEscapeDefault impl Fn = |c: char| -> char::EscapeDefault {
4457 #[stable(feature = "rust1", since = "1.0.0")]
4458 impl AsRef<[u8]> for str {
4460 fn as_ref(&self) -> &[u8] {
4465 #[stable(feature = "rust1", since = "1.0.0")]
4466 impl Default for &str {
4467 /// Creates an empty str
4468 fn default() -> Self {
4473 #[stable(feature = "default_mut_str", since = "1.28.0")]
4474 impl Default for &mut str {
4475 /// Creates an empty mutable str
4476 fn default() -> Self {
4477 // SAFETY: The empty string is valid UTF-8.
4478 unsafe { from_utf8_unchecked_mut(&mut []) }
4482 /// An iterator over the non-whitespace substrings of a string,
4483 /// separated by any amount of whitespace.
4485 /// This struct is created by the [`split_whitespace`] method on [`str`].
4486 /// See its documentation for more.
4488 /// [`split_whitespace`]: ../../std/primitive.str.html#method.split_whitespace
4489 /// [`str`]: ../../std/primitive.str.html
4490 #[stable(feature = "split_whitespace", since = "1.1.0")]
4491 #[derive(Clone, Debug)]
4492 pub struct SplitWhitespace<'a> {
4493 inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
4496 /// An iterator over the non-ASCII-whitespace substrings of a string,
4497 /// separated by any amount of ASCII whitespace.
4499 /// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
4500 /// See its documentation for more.
4502 /// [`split_ascii_whitespace`]: ../../std/primitive.str.html#method.split_ascii_whitespace
4503 /// [`str`]: ../../std/primitive.str.html
4504 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4505 #[derive(Clone, Debug)]
4506 pub struct SplitAsciiWhitespace<'a> {
4507 inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, BytesIsNotEmpty>, UnsafeBytesToStr>,
4510 /// An iterator over the substrings of a string,
4511 /// terminated by a substring matching to a predicate function
4512 /// Unlike `Split`, it contains the matched part as a terminator
4513 /// of the subslice.
4515 /// This struct is created by the [`split_inclusive`] method on [`str`].
4516 /// See its documentation for more.
4518 /// [`split_inclusive`]: ../../std/primitive.str.html#method.split_inclusive
4519 /// [`str`]: ../../std/primitive.str.html
4520 #[unstable(feature = "split_inclusive", issue = "none")]
4521 pub struct SplitInclusive<'a, P: Pattern<'a>>(SplitInternal<'a, P>);
4525 struct IsWhitespace impl Fn = |c: char| -> bool {
4530 struct IsAsciiWhitespace impl Fn = |byte: &u8| -> bool {
4531 byte.is_ascii_whitespace()
4535 struct IsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b str| -> bool {
4540 struct BytesIsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b [u8]| -> bool {
4545 struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
4547 unsafe { from_utf8_unchecked(bytes) }
4551 #[stable(feature = "split_whitespace", since = "1.1.0")]
4552 impl<'a> Iterator for SplitWhitespace<'a> {
4553 type Item = &'a str;
4556 fn next(&mut self) -> Option<&'a str> {
4561 fn size_hint(&self) -> (usize, Option<usize>) {
4562 self.inner.size_hint()
4566 fn last(mut self) -> Option<&'a str> {
4571 #[stable(feature = "split_whitespace", since = "1.1.0")]
4572 impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
4574 fn next_back(&mut self) -> Option<&'a str> {
4575 self.inner.next_back()
4579 #[stable(feature = "fused", since = "1.26.0")]
4580 impl FusedIterator for SplitWhitespace<'_> {}
4582 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4583 impl<'a> Iterator for SplitAsciiWhitespace<'a> {
4584 type Item = &'a str;
4587 fn next(&mut self) -> Option<&'a str> {
4592 fn size_hint(&self) -> (usize, Option<usize>) {
4593 self.inner.size_hint()
4597 fn last(mut self) -> Option<&'a str> {
4602 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4603 impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> {
4605 fn next_back(&mut self) -> Option<&'a str> {
4606 self.inner.next_back()
4610 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4611 impl FusedIterator for SplitAsciiWhitespace<'_> {}
4613 #[unstable(feature = "split_inclusive", issue = "none")]
4614 impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> {
4615 type Item = &'a str;
4618 fn next(&mut self) -> Option<&'a str> {
4619 self.0.next_inclusive()
4623 #[unstable(feature = "split_inclusive", issue = "none")]
4624 impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> {
4625 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4626 f.debug_struct("SplitInclusive").field("0", &self.0).finish()
4630 // FIXME(#26925) Remove in favor of `#[derive(Clone)]`
4631 #[unstable(feature = "split_inclusive", issue = "none")]
4632 impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitInclusive<'a, P> {
4633 fn clone(&self) -> Self {
4634 SplitInclusive(self.0.clone())
4638 #[unstable(feature = "split_inclusive", issue = "none")]
4639 impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator
4640 for SplitInclusive<'a, P>
4643 fn next_back(&mut self) -> Option<&'a str> {
4644 self.0.next_back_inclusive()
4648 #[unstable(feature = "split_inclusive", issue = "none")]
4649 impl<'a, P: Pattern<'a>> FusedIterator for SplitInclusive<'a, P> {}
4651 /// An iterator of [`u16`] over the string encoded as UTF-16.
4653 /// [`u16`]: ../../std/primitive.u16.html
4655 /// This struct is created by the [`encode_utf16`] method on [`str`].
4656 /// See its documentation for more.
4658 /// [`encode_utf16`]: ../../std/primitive.str.html#method.encode_utf16
4659 /// [`str`]: ../../std/primitive.str.html
4661 #[stable(feature = "encode_utf16", since = "1.8.0")]
4662 pub struct EncodeUtf16<'a> {
4667 #[stable(feature = "collection_debug", since = "1.17.0")]
4668 impl fmt::Debug for EncodeUtf16<'_> {
4669 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4670 f.pad("EncodeUtf16 { .. }")
4674 #[stable(feature = "encode_utf16", since = "1.8.0")]
4675 impl<'a> Iterator for EncodeUtf16<'a> {
4679 fn next(&mut self) -> Option<u16> {
4680 if self.extra != 0 {
4681 let tmp = self.extra;
4686 let mut buf = [0; 2];
4687 self.chars.next().map(|ch| {
4688 let n = ch.encode_utf16(&mut buf).len();
4690 self.extra = buf[1];
4697 fn size_hint(&self) -> (usize, Option<usize>) {
4698 let (low, high) = self.chars.size_hint();
4699 // every char gets either one u16 or two u16,
4700 // so this iterator is between 1 or 2 times as
4701 // long as the underlying iterator.
4702 (low, high.and_then(|n| n.checked_mul(2)))
4706 #[stable(feature = "fused", since = "1.26.0")]
4707 impl FusedIterator for EncodeUtf16<'_> {}
4709 /// The return type of [`str::escape_debug`].
4711 /// [`str::escape_debug`]: ../../std/primitive.str.html#method.escape_debug
4712 #[stable(feature = "str_escape", since = "1.34.0")]
4713 #[derive(Clone, Debug)]
4714 pub struct EscapeDebug<'a> {
4716 Flatten<option::IntoIter<char::EscapeDebug>>,
4717 FlatMap<Chars<'a>, char::EscapeDebug, CharEscapeDebugContinue>,
4721 /// The return type of [`str::escape_default`].
4723 /// [`str::escape_default`]: ../../std/primitive.str.html#method.escape_default
4724 #[stable(feature = "str_escape", since = "1.34.0")]
4725 #[derive(Clone, Debug)]
4726 pub struct EscapeDefault<'a> {
4727 inner: FlatMap<Chars<'a>, char::EscapeDefault, CharEscapeDefault>,
4730 /// The return type of [`str::escape_unicode`].
4732 /// [`str::escape_unicode`]: ../../std/primitive.str.html#method.escape_unicode
4733 #[stable(feature = "str_escape", since = "1.34.0")]
4734 #[derive(Clone, Debug)]
4735 pub struct EscapeUnicode<'a> {
4736 inner: FlatMap<Chars<'a>, char::EscapeUnicode, CharEscapeUnicode>,
4739 macro_rules! escape_types_impls {
4740 ($( $Name: ident ),+) => {$(
4741 #[stable(feature = "str_escape", since = "1.34.0")]
4742 impl<'a> fmt::Display for $Name<'a> {
4743 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4744 self.clone().try_for_each(|c| f.write_char(c))
4748 #[stable(feature = "str_escape", since = "1.34.0")]
4749 impl<'a> Iterator for $Name<'a> {
4753 fn next(&mut self) -> Option<char> { self.inner.next() }
4756 fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
4759 fn try_fold<Acc, Fold, R>(&mut self, init: Acc, fold: Fold) -> R where
4760 Self: Sized, Fold: FnMut(Acc, Self::Item) -> R, R: Try<Ok=Acc>
4762 self.inner.try_fold(init, fold)
4766 fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
4767 where Fold: FnMut(Acc, Self::Item) -> Acc,
4769 self.inner.fold(init, fold)
4773 #[stable(feature = "str_escape", since = "1.34.0")]
4774 impl<'a> FusedIterator for $Name<'a> {}
4778 escape_types_impls!(EscapeDebug, EscapeDefault, EscapeUnicode);