1 // ignore-tidy-filelength
3 //! String manipulation.
5 //! For more details, see the [`std::str`] module.
9 #![stable(feature = "rust1", since = "1.0.0")]
11 use self::pattern::Pattern;
12 use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
15 use crate::fmt::{self, Write};
16 use crate::iter::{Chain, FlatMap, Flatten};
17 use crate::iter::{Copied, Filter, FusedIterator, Map, TrustedLen, TrustedRandomAccess};
21 use crate::slice::{self, SliceIndex, Split as SliceSplit};
25 #[unstable(feature = "str_internals", issue = "none")]
26 #[allow(missing_docs)]
29 /// Parse a value from a string
31 /// `FromStr`'s [`from_str`] method is often used implicitly, through
32 /// [`str`]'s [`parse`] method. See [`parse`]'s documentation for examples.
34 /// [`from_str`]: FromStr::from_str
35 /// [`parse`]: str::parse
37 /// `FromStr` does not have a lifetime parameter, and so you can only parse types
38 /// that do not contain a lifetime parameter themselves. In other words, you can
39 /// parse an `i32` with `FromStr`, but not a `&i32`. You can parse a struct that
40 /// contains an `i32`, but not one that contains an `&i32`.
44 /// Basic implementation of `FromStr` on an example `Point` type:
47 /// use std::str::FromStr;
48 /// use std::num::ParseIntError;
50 /// #[derive(Debug, PartialEq)]
56 /// impl FromStr for Point {
57 /// type Err = ParseIntError;
59 /// fn from_str(s: &str) -> Result<Self, Self::Err> {
60 /// let coords: Vec<&str> = s.trim_matches(|p| p == '(' || p == ')' )
64 /// let x_fromstr = coords[0].parse::<i32>()?;
65 /// let y_fromstr = coords[1].parse::<i32>()?;
67 /// Ok(Point { x: x_fromstr, y: y_fromstr })
71 /// let p = Point::from_str("(1,2)");
72 /// assert_eq!(p.unwrap(), Point{ x: 1, y: 2} )
74 #[stable(feature = "rust1", since = "1.0.0")]
75 pub trait FromStr: Sized {
76 /// The associated error which can be returned from parsing.
77 #[stable(feature = "rust1", since = "1.0.0")]
80 /// Parses a string `s` to return a value of this type.
82 /// If parsing succeeds, return the value inside [`Ok`], otherwise
83 /// when the string is ill-formatted return an error specific to the
84 /// inside [`Err`]. The error type is specific to implementation of the trait.
86 /// [`Ok`]: ../../std/result/enum.Result.html#variant.Ok
87 /// [`Err`]: ../../std/result/enum.Result.html#variant.Err
91 /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
93 /// [ithirtytwo]: ../../std/primitive.i32.html
96 /// use std::str::FromStr;
99 /// let x = i32::from_str(s).unwrap();
101 /// assert_eq!(5, x);
103 #[stable(feature = "rust1", since = "1.0.0")]
104 fn from_str(s: &str) -> Result<Self, Self::Err>;
107 #[stable(feature = "rust1", since = "1.0.0")]
108 impl FromStr for bool {
109 type Err = ParseBoolError;
111 /// Parse a `bool` from a string.
113 /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
114 /// actually be parseable.
119 /// use std::str::FromStr;
121 /// assert_eq!(FromStr::from_str("true"), Ok(true));
122 /// assert_eq!(FromStr::from_str("false"), Ok(false));
123 /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
126 /// Note, in many cases, the `.parse()` method on `str` is more proper.
129 /// assert_eq!("true".parse(), Ok(true));
130 /// assert_eq!("false".parse(), Ok(false));
131 /// assert!("not even a boolean".parse::<bool>().is_err());
134 fn from_str(s: &str) -> Result<bool, ParseBoolError> {
137 "false" => Ok(false),
138 _ => Err(ParseBoolError { _priv: () }),
143 /// An error returned when parsing a `bool` using [`from_str`] fails
145 /// [`from_str`]: FromStr::from_str
146 #[derive(Debug, Clone, PartialEq, Eq)]
147 #[stable(feature = "rust1", since = "1.0.0")]
148 pub struct ParseBoolError {
152 #[stable(feature = "rust1", since = "1.0.0")]
153 impl fmt::Display for ParseBoolError {
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 "provided string was not `true` or `false`".fmt(f)
160 Section: Creating a string
163 /// Errors which can occur when attempting to interpret a sequence of [`u8`]
166 /// As such, the `from_utf8` family of functions and methods for both [`String`]s
167 /// and [`&str`]s make use of this error, for example.
169 /// [`String`]: ../../std/string/struct.String.html#method.from_utf8
170 /// [`&str`]: from_utf8
174 /// This error type’s methods can be used to create functionality
175 /// similar to `String::from_utf8_lossy` without allocating heap memory:
178 /// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) {
180 /// match std::str::from_utf8(input) {
186 /// let (valid, after_valid) = input.split_at(error.valid_up_to());
188 /// push(std::str::from_utf8_unchecked(valid))
190 /// push("\u{FFFD}");
192 /// if let Some(invalid_sequence_length) = error.error_len() {
193 /// input = &after_valid[invalid_sequence_length..]
202 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
203 #[stable(feature = "rust1", since = "1.0.0")]
204 pub struct Utf8Error {
206 error_len: Option<u8>,
210 /// Returns the index in the given string up to which valid UTF-8 was
213 /// It is the maximum index such that `from_utf8(&input[..index])`
214 /// would return `Ok(_)`.
223 /// // some invalid bytes, in a vector
224 /// let sparkle_heart = vec![0, 159, 146, 150];
226 /// // std::str::from_utf8 returns a Utf8Error
227 /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
229 /// // the second byte is invalid here
230 /// assert_eq!(1, error.valid_up_to());
232 #[stable(feature = "utf8_error", since = "1.5.0")]
233 pub fn valid_up_to(&self) -> usize {
237 /// Provides more information about the failure:
239 /// * `None`: the end of the input was reached unexpectedly.
240 /// `self.valid_up_to()` is 1 to 3 bytes from the end of the input.
241 /// If a byte stream (such as a file or a network socket) is being decoded incrementally,
242 /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
244 /// * `Some(len)`: an unexpected byte was encountered.
245 /// The length provided is that of the invalid byte sequence
246 /// that starts at the index given by `valid_up_to()`.
247 /// Decoding should resume after that sequence
248 /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of
251 /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
252 #[stable(feature = "utf8_error_error_len", since = "1.20.0")]
253 pub fn error_len(&self) -> Option<usize> {
254 self.error_len.map(|len| len as usize)
258 /// Converts a slice of bytes to a string slice.
260 /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice
261 /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between
262 /// the two. Not all byte slices are valid string slices, however: [`&str`] requires
263 /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid
264 /// UTF-8, and then does the conversion.
267 /// [byteslice]: ../../std/primitive.slice.html
269 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
270 /// incur the overhead of the validity check, there is an unsafe version of
271 /// this function, [`from_utf8_unchecked`][fromutf8u], which has the same
272 /// behavior but skips the check.
274 /// [fromutf8u]: fn.from_utf8_unchecked.html
276 /// If you need a `String` instead of a `&str`, consider
277 /// [`String::from_utf8`][string].
279 /// [string]: ../../std/string/struct.String.html#method.from_utf8
281 /// Because you can stack-allocate a `[u8; N]`, and you can take a
282 /// [`&[u8]`][byteslice] of it, this function is one way to have a
283 /// stack-allocated string. There is an example of this in the
284 /// examples section below.
286 /// [byteslice]: ../../std/primitive.slice.html
290 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
291 /// provided slice is not UTF-8.
300 /// // some bytes, in a vector
301 /// let sparkle_heart = vec![240, 159, 146, 150];
303 /// // We know these bytes are valid, so just use `unwrap()`.
304 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
306 /// assert_eq!("💖", sparkle_heart);
314 /// // some invalid bytes, in a vector
315 /// let sparkle_heart = vec![0, 159, 146, 150];
317 /// assert!(str::from_utf8(&sparkle_heart).is_err());
320 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
321 /// errors that can be returned.
323 /// [error]: struct.Utf8Error.html
325 /// A "stack allocated string":
330 /// // some bytes, in a stack-allocated array
331 /// let sparkle_heart = [240, 159, 146, 150];
333 /// // We know these bytes are valid, so just use `unwrap()`.
334 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
336 /// assert_eq!("💖", sparkle_heart);
338 #[stable(feature = "rust1", since = "1.0.0")]
339 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
340 run_utf8_validation(v)?;
341 // SAFETY: Just ran validation.
342 Ok(unsafe { from_utf8_unchecked(v) })
345 /// Converts a mutable slice of bytes to a mutable string slice.
354 /// // "Hello, Rust!" as a mutable vector
355 /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33];
357 /// // As we know these bytes are valid, we can use `unwrap()`
358 /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap();
360 /// assert_eq!("Hello, Rust!", outstr);
368 /// // Some invalid bytes in a mutable vector
369 /// let mut invalid = vec![128, 223];
371 /// assert!(str::from_utf8_mut(&mut invalid).is_err());
373 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
374 /// errors that can be returned.
376 /// [error]: struct.Utf8Error.html
377 #[stable(feature = "str_mut_extras", since = "1.20.0")]
378 pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
379 run_utf8_validation(v)?;
380 // SAFETY: Just ran validation.
381 Ok(unsafe { from_utf8_unchecked_mut(v) })
384 /// Converts a slice of bytes to a string slice without checking
385 /// that the string contains valid UTF-8.
387 /// See the safe version, [`from_utf8`][fromutf8], for more information.
389 /// [fromutf8]: fn.from_utf8.html
393 /// This function is unsafe because it does not check that the bytes passed to
394 /// it are valid UTF-8. If this constraint is violated, undefined behavior
395 /// results, as the rest of Rust assumes that [`&str`]s are valid UTF-8.
406 /// // some bytes, in a vector
407 /// let sparkle_heart = vec![240, 159, 146, 150];
409 /// let sparkle_heart = unsafe {
410 /// str::from_utf8_unchecked(&sparkle_heart)
413 /// assert_eq!("💖", sparkle_heart);
416 #[stable(feature = "rust1", since = "1.0.0")]
417 pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
418 // SAFETY: the caller must guarantee that the bytes `v`
419 // are valid UTF-8, thus the cast to `*const str` is safe.
420 // Also, the pointer dereference is safe because that pointer
421 // comes from a reference which is guaranteed to be valid for reads.
422 unsafe { &*(v as *const [u8] as *const str) }
425 /// Converts a slice of bytes to a string slice without checking
426 /// that the string contains valid UTF-8; mutable version.
428 /// See the immutable version, [`from_utf8_unchecked()`] for more information.
437 /// let mut heart = vec![240, 159, 146, 150];
438 /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) };
440 /// assert_eq!("💖", heart);
443 #[stable(feature = "str_mut_extras", since = "1.20.0")]
444 pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
445 // SAFETY: the caller must guarantee that the bytes `v`
446 // are valid UTF-8, thus the cast to `*mut str` is safe.
447 // Also, the pointer dereference is safe because that pointer
448 // comes from a reference which is guaranteed to be valid for writes.
449 unsafe { &mut *(v as *mut [u8] as *mut str) }
452 #[stable(feature = "rust1", since = "1.0.0")]
453 impl fmt::Display for Utf8Error {
454 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
455 if let Some(error_len) = self.error_len {
458 "invalid utf-8 sequence of {} bytes from index {}",
459 error_len, self.valid_up_to
462 write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
471 /// An iterator over the [`char`]s of a string slice.
474 /// This struct is created by the [`chars`] method on [`str`].
475 /// See its documentation for more.
477 /// [`chars`]: str::chars
479 #[stable(feature = "rust1", since = "1.0.0")]
480 pub struct Chars<'a> {
481 iter: slice::Iter<'a, u8>,
484 /// Returns the initial codepoint accumulator for the first byte.
485 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
486 /// for width 3, and 3 bits for width 4.
488 fn utf8_first_byte(byte: u8, width: u32) -> u32 {
489 (byte & (0x7F >> width)) as u32
492 /// Returns the value of `ch` updated with continuation byte `byte`.
494 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
495 (ch << 6) | (byte & CONT_MASK) as u32
498 /// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
501 fn utf8_is_cont_byte(byte: u8) -> bool {
502 (byte & !CONT_MASK) == TAG_CONT_U8
506 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
513 /// Reads the next code point out of a byte iterator (assuming a
514 /// UTF-8-like encoding).
515 #[unstable(feature = "str_internals", issue = "none")]
517 pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
519 let x = *bytes.next()?;
521 return Some(x as u32);
524 // Multibyte case follows
525 // Decode from a byte combination out of: [[[x y] z] w]
526 // NOTE: Performance is sensitive to the exact formulation here
527 let init = utf8_first_byte(x, 2);
528 let y = unwrap_or_0(bytes.next());
529 let mut ch = utf8_acc_cont_byte(init, y);
532 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
533 let z = unwrap_or_0(bytes.next());
534 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
535 ch = init << 12 | y_z;
538 // use only the lower 3 bits of `init`
539 let w = unwrap_or_0(bytes.next());
540 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
547 /// Reads the last code point out of a byte iterator (assuming a
548 /// UTF-8-like encoding).
550 fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
552 I: DoubleEndedIterator<Item = &'a u8>,
555 let w = match *bytes.next_back()? {
556 next_byte if next_byte < 128 => return Some(next_byte as u32),
557 back_byte => back_byte,
560 // Multibyte case follows
561 // Decode from a byte combination out of: [x [y [z w]]]
563 let z = unwrap_or_0(bytes.next_back());
564 ch = utf8_first_byte(z, 2);
565 if utf8_is_cont_byte(z) {
566 let y = unwrap_or_0(bytes.next_back());
567 ch = utf8_first_byte(y, 3);
568 if utf8_is_cont_byte(y) {
569 let x = unwrap_or_0(bytes.next_back());
570 ch = utf8_first_byte(x, 4);
571 ch = utf8_acc_cont_byte(ch, y);
573 ch = utf8_acc_cont_byte(ch, z);
575 ch = utf8_acc_cont_byte(ch, w);
580 #[stable(feature = "rust1", since = "1.0.0")]
581 impl<'a> Iterator for Chars<'a> {
585 fn next(&mut self) -> Option<char> {
586 next_code_point(&mut self.iter).map(|ch| {
587 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
588 unsafe { char::from_u32_unchecked(ch) }
593 fn count(self) -> usize {
594 // length in `char` is equal to the number of non-continuation bytes
595 let bytes_len = self.iter.len();
596 let mut cont_bytes = 0;
597 for &byte in self.iter {
598 cont_bytes += utf8_is_cont_byte(byte) as usize;
600 bytes_len - cont_bytes
604 fn size_hint(&self) -> (usize, Option<usize>) {
605 let len = self.iter.len();
606 // `(len + 3)` can't overflow, because we know that the `slice::Iter`
607 // belongs to a slice in memory which has a maximum length of
608 // `isize::MAX` (that's well below `usize::MAX`).
609 ((len + 3) / 4, Some(len))
613 fn last(mut self) -> Option<char> {
614 // No need to go through the entire string.
619 #[stable(feature = "chars_debug_impl", since = "1.38.0")]
620 impl fmt::Debug for Chars<'_> {
621 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
622 write!(f, "Chars(")?;
623 f.debug_list().entries(self.clone()).finish()?;
629 #[stable(feature = "rust1", since = "1.0.0")]
630 impl<'a> DoubleEndedIterator for Chars<'a> {
632 fn next_back(&mut self) -> Option<char> {
633 next_code_point_reverse(&mut self.iter).map(|ch| {
634 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
635 unsafe { char::from_u32_unchecked(ch) }
640 #[stable(feature = "fused", since = "1.26.0")]
641 impl FusedIterator for Chars<'_> {}
644 /// Views the underlying data as a subslice of the original data.
646 /// This has the same lifetime as the original slice, and so the
647 /// iterator can continue to be used while this exists.
652 /// let mut chars = "abc".chars();
654 /// assert_eq!(chars.as_str(), "abc");
656 /// assert_eq!(chars.as_str(), "bc");
659 /// assert_eq!(chars.as_str(), "");
661 #[stable(feature = "iter_to_slice", since = "1.4.0")]
663 pub fn as_str(&self) -> &'a str {
664 // SAFETY: `Chars` is only made from a str, which guarantees the iter is valid UTF-8.
665 unsafe { from_utf8_unchecked(self.iter.as_slice()) }
669 /// An iterator over the [`char`]s of a string slice, and their positions.
671 /// This struct is created by the [`char_indices`] method on [`str`].
672 /// See its documentation for more.
674 /// [`char_indices`]: str::char_indices
675 #[derive(Clone, Debug)]
676 #[stable(feature = "rust1", since = "1.0.0")]
677 pub struct CharIndices<'a> {
682 #[stable(feature = "rust1", since = "1.0.0")]
683 impl<'a> Iterator for CharIndices<'a> {
684 type Item = (usize, char);
687 fn next(&mut self) -> Option<(usize, char)> {
688 let pre_len = self.iter.iter.len();
689 match self.iter.next() {
692 let index = self.front_offset;
693 let len = self.iter.iter.len();
694 self.front_offset += pre_len - len;
701 fn count(self) -> usize {
706 fn size_hint(&self) -> (usize, Option<usize>) {
707 self.iter.size_hint()
711 fn last(mut self) -> Option<(usize, char)> {
712 // No need to go through the entire string.
717 #[stable(feature = "rust1", since = "1.0.0")]
718 impl<'a> DoubleEndedIterator for CharIndices<'a> {
720 fn next_back(&mut self) -> Option<(usize, char)> {
721 self.iter.next_back().map(|ch| {
722 let index = self.front_offset + self.iter.iter.len();
728 #[stable(feature = "fused", since = "1.26.0")]
729 impl FusedIterator for CharIndices<'_> {}
731 impl<'a> CharIndices<'a> {
732 /// Views the underlying data as a subslice of the original data.
734 /// This has the same lifetime as the original slice, and so the
735 /// iterator can continue to be used while this exists.
736 #[stable(feature = "iter_to_slice", since = "1.4.0")]
738 pub fn as_str(&self) -> &'a str {
743 /// An iterator over the bytes of a string slice.
745 /// This struct is created by the [`bytes`] method on [`str`].
746 /// See its documentation for more.
748 /// [`bytes`]: str::bytes
749 #[stable(feature = "rust1", since = "1.0.0")]
750 #[derive(Clone, Debug)]
751 pub struct Bytes<'a>(Copied<slice::Iter<'a, u8>>);
753 #[stable(feature = "rust1", since = "1.0.0")]
754 impl Iterator for Bytes<'_> {
758 fn next(&mut self) -> Option<u8> {
763 fn size_hint(&self) -> (usize, Option<usize>) {
768 fn count(self) -> usize {
773 fn last(self) -> Option<Self::Item> {
778 fn nth(&mut self, n: usize) -> Option<Self::Item> {
783 fn all<F>(&mut self, f: F) -> bool
785 F: FnMut(Self::Item) -> bool,
791 fn any<F>(&mut self, f: F) -> bool
793 F: FnMut(Self::Item) -> bool,
799 fn find<P>(&mut self, predicate: P) -> Option<Self::Item>
801 P: FnMut(&Self::Item) -> bool,
803 self.0.find(predicate)
807 fn position<P>(&mut self, predicate: P) -> Option<usize>
809 P: FnMut(Self::Item) -> bool,
811 self.0.position(predicate)
815 fn rposition<P>(&mut self, predicate: P) -> Option<usize>
817 P: FnMut(Self::Item) -> bool,
819 self.0.rposition(predicate)
823 #[stable(feature = "rust1", since = "1.0.0")]
824 impl DoubleEndedIterator for Bytes<'_> {
826 fn next_back(&mut self) -> Option<u8> {
831 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
836 fn rfind<P>(&mut self, predicate: P) -> Option<Self::Item>
838 P: FnMut(&Self::Item) -> bool,
840 self.0.rfind(predicate)
844 #[stable(feature = "rust1", since = "1.0.0")]
845 impl ExactSizeIterator for Bytes<'_> {
847 fn len(&self) -> usize {
852 fn is_empty(&self) -> bool {
857 #[stable(feature = "fused", since = "1.26.0")]
858 impl FusedIterator for Bytes<'_> {}
860 #[unstable(feature = "trusted_len", issue = "37572")]
861 unsafe impl TrustedLen for Bytes<'_> {}
864 unsafe impl TrustedRandomAccess for Bytes<'_> {
865 unsafe fn get_unchecked(&mut self, i: usize) -> u8 {
866 // SAFETY: the caller must uphold the safety contract
867 // for `TrustedRandomAccess::get_unchecked`.
868 unsafe { self.0.get_unchecked(i) }
870 fn may_have_side_effect() -> bool {
875 /// This macro generates a Clone impl for string pattern API
876 /// wrapper types of the form X<'a, P>
877 macro_rules! derive_pattern_clone {
878 (clone $t:ident with |$s:ident| $e:expr) => {
879 impl<'a, P> Clone for $t<'a, P>
881 P: Pattern<'a, Searcher: Clone>,
883 fn clone(&self) -> Self {
891 /// This macro generates two public iterator structs
892 /// wrapping a private internal one that makes use of the `Pattern` API.
894 /// For all patterns `P: Pattern<'a>` the following items will be
895 /// generated (generics omitted):
897 /// struct $forward_iterator($internal_iterator);
898 /// struct $reverse_iterator($internal_iterator);
900 /// impl Iterator for $forward_iterator
901 /// { /* internal ends up calling Searcher::next_match() */ }
903 /// impl DoubleEndedIterator for $forward_iterator
904 /// where P::Searcher: DoubleEndedSearcher
905 /// { /* internal ends up calling Searcher::next_match_back() */ }
907 /// impl Iterator for $reverse_iterator
908 /// where P::Searcher: ReverseSearcher
909 /// { /* internal ends up calling Searcher::next_match_back() */ }
911 /// impl DoubleEndedIterator for $reverse_iterator
912 /// where P::Searcher: DoubleEndedSearcher
913 /// { /* internal ends up calling Searcher::next_match() */ }
915 /// The internal one is defined outside the macro, and has almost the same
916 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
917 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
919 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
920 /// `Pattern` might not return the same elements, so actually implementing
921 /// `DoubleEndedIterator` for it would be incorrect.
922 /// (See the docs in `str::pattern` for more details)
924 /// However, the internal struct still represents a single ended iterator from
925 /// either end, and depending on pattern is also a valid double ended iterator,
926 /// so the two wrapper structs implement `Iterator`
927 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
928 /// to the complex impls seen above.
929 macro_rules! generate_pattern_iterators {
933 $(#[$forward_iterator_attribute:meta])*
934 struct $forward_iterator:ident;
938 $(#[$reverse_iterator_attribute:meta])*
939 struct $reverse_iterator:ident;
941 // Stability of all generated items
943 $(#[$common_stability_attribute:meta])*
945 // Internal almost-iterator that is being delegated to
947 $internal_iterator:ident yielding ($iterty:ty);
949 // Kind of delegation - either single ended or double ended
952 $(#[$forward_iterator_attribute])*
953 $(#[$common_stability_attribute])*
954 pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
956 $(#[$common_stability_attribute])*
957 impl<'a, P> fmt::Debug for $forward_iterator<'a, P>
959 P: Pattern<'a, Searcher: fmt::Debug>,
961 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
962 f.debug_tuple(stringify!($forward_iterator))
968 $(#[$common_stability_attribute])*
969 impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
973 fn next(&mut self) -> Option<$iterty> {
978 $(#[$common_stability_attribute])*
979 impl<'a, P> Clone for $forward_iterator<'a, P>
981 P: Pattern<'a, Searcher: Clone>,
983 fn clone(&self) -> Self {
984 $forward_iterator(self.0.clone())
988 $(#[$reverse_iterator_attribute])*
989 $(#[$common_stability_attribute])*
990 pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
992 $(#[$common_stability_attribute])*
993 impl<'a, P> fmt::Debug for $reverse_iterator<'a, P>
995 P: Pattern<'a, Searcher: fmt::Debug>,
997 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
998 f.debug_tuple(stringify!($reverse_iterator))
1004 $(#[$common_stability_attribute])*
1005 impl<'a, P> Iterator for $reverse_iterator<'a, P>
1007 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1009 type Item = $iterty;
1012 fn next(&mut self) -> Option<$iterty> {
1017 $(#[$common_stability_attribute])*
1018 impl<'a, P> Clone for $reverse_iterator<'a, P>
1020 P: Pattern<'a, Searcher: Clone>,
1022 fn clone(&self) -> Self {
1023 $reverse_iterator(self.0.clone())
1027 #[stable(feature = "fused", since = "1.26.0")]
1028 impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {}
1030 #[stable(feature = "fused", since = "1.26.0")]
1031 impl<'a, P> FusedIterator for $reverse_iterator<'a, P>
1033 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1036 generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
1038 $reverse_iterator, $iterty);
1041 double ended; with $(#[$common_stability_attribute:meta])*,
1042 $forward_iterator:ident,
1043 $reverse_iterator:ident, $iterty:ty
1045 $(#[$common_stability_attribute])*
1046 impl<'a, P> DoubleEndedIterator for $forward_iterator<'a, P>
1048 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1051 fn next_back(&mut self) -> Option<$iterty> {
1056 $(#[$common_stability_attribute])*
1057 impl<'a, P> DoubleEndedIterator for $reverse_iterator<'a, P>
1059 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1062 fn next_back(&mut self) -> Option<$iterty> {
1068 single ended; with $(#[$common_stability_attribute:meta])*,
1069 $forward_iterator:ident,
1070 $reverse_iterator:ident, $iterty:ty
1074 derive_pattern_clone! {
1076 with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
1079 struct SplitInternal<'a, P: Pattern<'a>> {
1082 matcher: P::Searcher,
1083 allow_trailing_empty: bool,
1087 impl<'a, P> fmt::Debug for SplitInternal<'a, P>
1089 P: Pattern<'a, Searcher: fmt::Debug>,
1091 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1092 f.debug_struct("SplitInternal")
1093 .field("start", &self.start)
1094 .field("end", &self.end)
1095 .field("matcher", &self.matcher)
1096 .field("allow_trailing_empty", &self.allow_trailing_empty)
1097 .field("finished", &self.finished)
1102 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1104 fn get_end(&mut self) -> Option<&'a str> {
1105 if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
1106 self.finished = true;
1107 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1109 let string = self.matcher.haystack().get_unchecked(self.start..self.end);
1118 fn next(&mut self) -> Option<&'a str> {
1123 let haystack = self.matcher.haystack();
1124 match self.matcher.next_match() {
1125 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1126 Some((a, b)) => unsafe {
1127 let elt = haystack.get_unchecked(self.start..a);
1131 None => self.get_end(),
1136 fn next_inclusive(&mut self) -> Option<&'a str> {
1141 let haystack = self.matcher.haystack();
1142 match self.matcher.next_match() {
1143 // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
1144 // and self.start is either the start of the original string,
1145 // or `b` was assigned to it, so it also lies on unicode boundary.
1146 Some((_, b)) => unsafe {
1147 let elt = haystack.get_unchecked(self.start..b);
1151 None => self.get_end(),
1156 fn next_back(&mut self) -> Option<&'a str>
1158 P::Searcher: ReverseSearcher<'a>,
1164 if !self.allow_trailing_empty {
1165 self.allow_trailing_empty = true;
1166 match self.next_back() {
1167 Some(elt) if !elt.is_empty() => return Some(elt),
1176 let haystack = self.matcher.haystack();
1177 match self.matcher.next_match_back() {
1178 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1179 Some((a, b)) => unsafe {
1180 let elt = haystack.get_unchecked(b..self.end);
1184 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1186 self.finished = true;
1187 Some(haystack.get_unchecked(self.start..self.end))
1193 fn next_back_inclusive(&mut self) -> Option<&'a str>
1195 P::Searcher: ReverseSearcher<'a>,
1201 if !self.allow_trailing_empty {
1202 self.allow_trailing_empty = true;
1203 match self.next_back_inclusive() {
1204 Some(elt) if !elt.is_empty() => return Some(elt),
1213 let haystack = self.matcher.haystack();
1214 match self.matcher.next_match_back() {
1215 // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
1216 // and self.end is either the end of the original string,
1217 // or `b` was assigned to it, so it also lies on unicode boundary.
1218 Some((_, b)) => unsafe {
1219 let elt = haystack.get_unchecked(b..self.end);
1223 // SAFETY: self.start is either the start of the original string,
1224 // or start of a substring that represents the part of the string that hasn't
1225 // iterated yet. Either way, it is guaranteed to lie on unicode boundary.
1226 // self.end is either the end of the original string,
1227 // or `b` was assigned to it, so it also lies on unicode boundary.
1229 self.finished = true;
1230 Some(haystack.get_unchecked(self.start..self.end))
1236 generate_pattern_iterators! {
1238 /// Created with the method [`split`].
1240 /// [`split`]: str::split
1243 /// Created with the method [`rsplit`].
1245 /// [`rsplit`]: str::rsplit
1248 #[stable(feature = "rust1", since = "1.0.0")]
1250 SplitInternal yielding (&'a str);
1251 delegate double ended;
1254 generate_pattern_iterators! {
1256 /// Created with the method [`split_terminator`].
1258 /// [`split_terminator`]: str::split_terminator
1259 struct SplitTerminator;
1261 /// Created with the method [`rsplit_terminator`].
1263 /// [`rsplit_terminator`]: str::rsplit_terminator
1264 struct RSplitTerminator;
1266 #[stable(feature = "rust1", since = "1.0.0")]
1268 SplitInternal yielding (&'a str);
1269 delegate double ended;
1272 derive_pattern_clone! {
1273 clone SplitNInternal
1274 with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
1277 struct SplitNInternal<'a, P: Pattern<'a>> {
1278 iter: SplitInternal<'a, P>,
1279 /// The number of splits remaining
1283 impl<'a, P> fmt::Debug for SplitNInternal<'a, P>
1285 P: Pattern<'a, Searcher: fmt::Debug>,
1287 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1288 f.debug_struct("SplitNInternal")
1289 .field("iter", &self.iter)
1290 .field("count", &self.count)
1295 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
1297 fn next(&mut self) -> Option<&'a str> {
1312 fn next_back(&mut self) -> Option<&'a str>
1314 P::Searcher: ReverseSearcher<'a>,
1324 self.iter.next_back()
1330 generate_pattern_iterators! {
1332 /// Created with the method [`splitn`].
1334 /// [`splitn`]: str::splitn
1337 /// Created with the method [`rsplitn`].
1339 /// [`rsplitn`]: str::rsplitn
1342 #[stable(feature = "rust1", since = "1.0.0")]
1344 SplitNInternal yielding (&'a str);
1345 delegate single ended;
1348 derive_pattern_clone! {
1349 clone MatchIndicesInternal
1350 with |s| MatchIndicesInternal(s.0.clone())
1353 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
1355 impl<'a, P> fmt::Debug for MatchIndicesInternal<'a, P>
1357 P: Pattern<'a, Searcher: fmt::Debug>,
1359 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1360 f.debug_tuple("MatchIndicesInternal").field(&self.0).finish()
1364 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1366 fn next(&mut self) -> Option<(usize, &'a str)> {
1369 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1370 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1374 fn next_back(&mut self) -> Option<(usize, &'a str)>
1376 P::Searcher: ReverseSearcher<'a>,
1380 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1381 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1385 generate_pattern_iterators! {
1387 /// Created with the method [`match_indices`].
1389 /// [`match_indices`]: str::match_indices
1390 struct MatchIndices;
1392 /// Created with the method [`rmatch_indices`].
1394 /// [`rmatch_indices`]: str::rmatch_indices
1395 struct RMatchIndices;
1397 #[stable(feature = "str_match_indices", since = "1.5.0")]
1399 MatchIndicesInternal yielding ((usize, &'a str));
1400 delegate double ended;
1403 derive_pattern_clone! {
1404 clone MatchesInternal
1405 with |s| MatchesInternal(s.0.clone())
1408 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1410 impl<'a, P> fmt::Debug for MatchesInternal<'a, P>
1412 P: Pattern<'a, Searcher: fmt::Debug>,
1414 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1415 f.debug_tuple("MatchesInternal").field(&self.0).finish()
1419 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1421 fn next(&mut self) -> Option<&'a str> {
1422 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1423 self.0.next_match().map(|(a, b)| unsafe {
1424 // Indices are known to be on utf8 boundaries
1425 self.0.haystack().get_unchecked(a..b)
1430 fn next_back(&mut self) -> Option<&'a str>
1432 P::Searcher: ReverseSearcher<'a>,
1434 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1435 self.0.next_match_back().map(|(a, b)| unsafe {
1436 // Indices are known to be on utf8 boundaries
1437 self.0.haystack().get_unchecked(a..b)
1442 generate_pattern_iterators! {
1444 /// Created with the method [`matches`].
1446 /// [`matches`]: str::matches
1449 /// Created with the method [`rmatches`].
1451 /// [`rmatches`]: str::rmatches
1454 #[stable(feature = "str_matches", since = "1.2.0")]
1456 MatchesInternal yielding (&'a str);
1457 delegate double ended;
1460 /// An iterator over the lines of a string, as string slices.
1462 /// This struct is created with the [`lines`] method on [`str`].
1463 /// See its documentation for more.
1465 /// [`lines`]: str::lines
1466 #[stable(feature = "rust1", since = "1.0.0")]
1467 #[derive(Clone, Debug)]
1468 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1470 #[stable(feature = "rust1", since = "1.0.0")]
1471 impl<'a> Iterator for Lines<'a> {
1472 type Item = &'a str;
1475 fn next(&mut self) -> Option<&'a str> {
1480 fn size_hint(&self) -> (usize, Option<usize>) {
1485 fn last(mut self) -> Option<&'a str> {
1490 #[stable(feature = "rust1", since = "1.0.0")]
1491 impl<'a> DoubleEndedIterator for Lines<'a> {
1493 fn next_back(&mut self) -> Option<&'a str> {
1498 #[stable(feature = "fused", since = "1.26.0")]
1499 impl FusedIterator for Lines<'_> {}
1501 /// Created with the method [`lines_any`].
1503 /// [`lines_any`]: str::lines_any
1504 #[stable(feature = "rust1", since = "1.0.0")]
1505 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1506 #[derive(Clone, Debug)]
1507 #[allow(deprecated)]
1508 pub struct LinesAny<'a>(Lines<'a>);
1511 /// A nameable, cloneable fn type
1513 struct LinesAnyMap impl<'a> Fn = |line: &'a str| -> &'a str {
1515 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1520 #[stable(feature = "rust1", since = "1.0.0")]
1521 #[allow(deprecated)]
1522 impl<'a> Iterator for LinesAny<'a> {
1523 type Item = &'a str;
1526 fn next(&mut self) -> Option<&'a str> {
1531 fn size_hint(&self) -> (usize, Option<usize>) {
1536 #[stable(feature = "rust1", since = "1.0.0")]
1537 #[allow(deprecated)]
1538 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1540 fn next_back(&mut self) -> Option<&'a str> {
1545 #[stable(feature = "fused", since = "1.26.0")]
1546 #[allow(deprecated)]
1547 impl FusedIterator for LinesAny<'_> {}
1550 Section: UTF-8 validation
1553 // use truncation to fit u64 into usize
1554 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1556 /// Returns `true` if any byte in the word `x` is nonascii (>= 128).
1558 fn contains_nonascii(x: usize) -> bool {
1559 (x & NONASCII_MASK) != 0
1562 /// Walks through `v` checking that it's a valid UTF-8 sequence,
1563 /// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
1565 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1569 let usize_bytes = mem::size_of::<usize>();
1570 let ascii_block_size = 2 * usize_bytes;
1571 let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1572 let align = v.as_ptr().align_offset(usize_bytes);
1575 let old_offset = index;
1577 ($error_len: expr) => {
1578 return Err(Utf8Error { valid_up_to: old_offset, error_len: $error_len });
1585 // we needed data, but there was none: error!
1593 let first = v[index];
1595 let w = UTF8_CHAR_WIDTH[first as usize];
1596 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
1597 // first C2 80 last DF BF
1598 // 3-byte encoding is for codepoints \u{0800} to \u{ffff}
1599 // first E0 A0 80 last EF BF BF
1600 // excluding surrogates codepoints \u{d800} to \u{dfff}
1601 // ED A0 80 to ED BF BF
1602 // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1603 // first F0 90 80 80 last F4 8F BF BF
1605 // Use the UTF-8 syntax from the RFC
1607 // https://tools.ietf.org/html/rfc3629
1609 // UTF8-2 = %xC2-DF UTF8-tail
1610 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1611 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1612 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1613 // %xF4 %x80-8F 2( UTF8-tail )
1616 if next!() & !CONT_MASK != TAG_CONT_U8 {
1621 match (first, next!()) {
1623 | (0xE1..=0xEC, 0x80..=0xBF)
1624 | (0xED, 0x80..=0x9F)
1625 | (0xEE..=0xEF, 0x80..=0xBF) => {}
1628 if next!() & !CONT_MASK != TAG_CONT_U8 {
1633 match (first, next!()) {
1634 (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
1637 if next!() & !CONT_MASK != TAG_CONT_U8 {
1640 if next!() & !CONT_MASK != TAG_CONT_U8 {
1648 // Ascii case, try to skip forward quickly.
1649 // When the pointer is aligned, read 2 words of data per iteration
1650 // until we find a word containing a non-ascii byte.
1651 if align != usize::MAX && align.wrapping_sub(index) % usize_bytes == 0 {
1652 let ptr = v.as_ptr();
1653 while index < blocks_end {
1654 // SAFETY: since `align - index` and `ascii_block_size` are
1655 // multiples of `usize_bytes`, `block = ptr.add(index)` is
1656 // always aligned with a `usize` so it's safe to dereference
1657 // both `block` and `block.offset(1)`.
1659 let block = ptr.add(index) as *const usize;
1660 // break if there is a nonascii byte
1661 let zu = contains_nonascii(*block);
1662 let zv = contains_nonascii(*block.offset(1));
1667 index += ascii_block_size;
1669 // step from the point where the wordwise loop stopped
1670 while index < len && v[index] < 128 {
1682 // https://tools.ietf.org/html/rfc3629
1683 static UTF8_CHAR_WIDTH: [u8; 256] = [
1684 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1686 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1688 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1690 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1696 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1698 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
1699 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
1702 /// Given a first byte, determines how many bytes are in this UTF-8 character.
1703 #[unstable(feature = "str_internals", issue = "none")]
1705 pub fn utf8_char_width(b: u8) -> usize {
1706 UTF8_CHAR_WIDTH[b as usize] as usize
1709 /// Mask of the value bits of a continuation byte.
1710 const CONT_MASK: u8 = 0b0011_1111;
1711 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
1712 const TAG_CONT_U8: u8 = 0b1000_0000;
1715 Section: Trait implementations
1719 use crate::cmp::Ordering;
1722 use crate::slice::SliceIndex;
1724 /// Implements ordering of strings.
1726 /// Strings are ordered lexicographically by their byte values. This orders Unicode code
1727 /// points based on their positions in the code charts. This is not necessarily the same as
1728 /// "alphabetical" order, which varies by language and locale. Sorting strings according to
1729 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1731 #[stable(feature = "rust1", since = "1.0.0")]
1734 fn cmp(&self, other: &str) -> Ordering {
1735 self.as_bytes().cmp(other.as_bytes())
1739 #[stable(feature = "rust1", since = "1.0.0")]
1740 impl PartialEq for str {
1742 fn eq(&self, other: &str) -> bool {
1743 self.as_bytes() == other.as_bytes()
1746 fn ne(&self, other: &str) -> bool {
1751 #[stable(feature = "rust1", since = "1.0.0")]
1754 /// Implements comparison operations on strings.
1756 /// Strings are compared lexicographically by their byte values. This compares Unicode code
1757 /// points based on their positions in the code charts. This is not necessarily the same as
1758 /// "alphabetical" order, which varies by language and locale. Comparing strings according to
1759 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1761 #[stable(feature = "rust1", since = "1.0.0")]
1762 impl PartialOrd for str {
1764 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1765 Some(self.cmp(other))
1769 #[stable(feature = "rust1", since = "1.0.0")]
1770 impl<I> ops::Index<I> for str
1774 type Output = I::Output;
1777 fn index(&self, index: I) -> &I::Output {
1782 #[stable(feature = "rust1", since = "1.0.0")]
1783 impl<I> ops::IndexMut<I> for str
1788 fn index_mut(&mut self, index: I) -> &mut I::Output {
1789 index.index_mut(self)
1796 fn str_index_overflow_fail() -> ! {
1797 panic!("attempted to index str up to maximum usize");
1800 /// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`.
1802 /// Returns a slice of the whole string, i.e., returns `&self` or `&mut
1803 /// self`. Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`. Unlike
1804 /// other indexing operations, this can never panic.
1806 /// This operation is `O(1)`.
1808 /// Prior to 1.20.0, these indexing operations were still supported by
1809 /// direct implementation of `Index` and `IndexMut`.
1811 /// Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`.
1812 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1813 unsafe impl SliceIndex<str> for ops::RangeFull {
1816 fn get(self, slice: &str) -> Option<&Self::Output> {
1820 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1824 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
1828 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
1832 fn index(self, slice: &str) -> &Self::Output {
1836 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1841 /// Implements substring slicing with syntax `&self[begin .. end]` or `&mut
1842 /// self[begin .. end]`.
1844 /// Returns a slice of the given string from the byte range
1845 /// [`begin`, `end`).
1847 /// This operation is `O(1)`.
1849 /// Prior to 1.20.0, these indexing operations were still supported by
1850 /// direct implementation of `Index` and `IndexMut`.
1854 /// Panics if `begin` or `end` does not point to the starting byte offset of
1855 /// a character (as defined by `is_char_boundary`), if `begin > end`, or if
1861 /// let s = "Löwe 老虎 Léopard";
1862 /// assert_eq!(&s[0 .. 1], "L");
1864 /// assert_eq!(&s[1 .. 9], "öwe 老");
1866 /// // these will panic:
1867 /// // byte 2 lies within `ö`:
1870 /// // byte 8 lies within `老`
1873 /// // byte 100 is outside the string
1874 /// // &s[3 .. 100];
1876 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1877 unsafe impl SliceIndex<str> for ops::Range<usize> {
1880 fn get(self, slice: &str) -> Option<&Self::Output> {
1881 if self.start <= self.end
1882 && slice.is_char_boundary(self.start)
1883 && slice.is_char_boundary(self.end)
1885 // SAFETY: just checked that `start` and `end` are on a char boundary,
1886 // and we are passing in a safe reference, so the return value will also be one.
1887 // We also checked char boundaries, so this is valid UTF-8.
1888 Some(unsafe { &*self.get_unchecked(slice) })
1894 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1895 if self.start <= self.end
1896 && slice.is_char_boundary(self.start)
1897 && slice.is_char_boundary(self.end)
1899 // SAFETY: just checked that `start` and `end` are on a char boundary.
1900 // We know the pointer is unique because we got it from `slice`.
1901 Some(unsafe { &mut *self.get_unchecked_mut(slice) })
1907 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
1908 let slice = slice as *const [u8];
1909 // SAFETY: the caller guarantees that `self` is in bounds of `slice`
1910 // which satisfies all the conditions for `add`.
1911 let ptr = unsafe { slice.as_ptr().add(self.start) };
1912 let len = self.end - self.start;
1913 ptr::slice_from_raw_parts(ptr, len) as *const str
1916 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
1917 let slice = slice as *mut [u8];
1918 // SAFETY: see comments for `get_unchecked`.
1919 let ptr = unsafe { slice.as_mut_ptr().add(self.start) };
1920 let len = self.end - self.start;
1921 ptr::slice_from_raw_parts_mut(ptr, len) as *mut str
1924 fn index(self, slice: &str) -> &Self::Output {
1925 let (start, end) = (self.start, self.end);
1926 self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, start, end))
1929 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1930 // is_char_boundary checks that the index is in [0, .len()]
1931 // cannot reuse `get` as above, because of NLL trouble
1932 if self.start <= self.end
1933 && slice.is_char_boundary(self.start)
1934 && slice.is_char_boundary(self.end)
1936 // SAFETY: just checked that `start` and `end` are on a char boundary,
1937 // and we are passing in a safe reference, so the return value will also be one.
1938 unsafe { &mut *self.get_unchecked_mut(slice) }
1940 super::slice_error_fail(slice, self.start, self.end)
1945 /// Implements substring slicing with syntax `&self[.. end]` or `&mut
1948 /// Returns a slice of the given string from the byte range [`0`, `end`).
1949 /// Equivalent to `&self[0 .. end]` or `&mut self[0 .. end]`.
1951 /// This operation is `O(1)`.
1953 /// Prior to 1.20.0, these indexing operations were still supported by
1954 /// direct implementation of `Index` and `IndexMut`.
1958 /// Panics if `end` does not point to the starting byte offset of a
1959 /// character (as defined by `is_char_boundary`), or if `end > len`.
1960 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1961 unsafe impl SliceIndex<str> for ops::RangeTo<usize> {
1964 fn get(self, slice: &str) -> Option<&Self::Output> {
1965 if slice.is_char_boundary(self.end) {
1966 // SAFETY: just checked that `end` is on a char boundary,
1967 // and we are passing in a safe reference, so the return value will also be one.
1968 Some(unsafe { &*self.get_unchecked(slice) })
1974 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1975 if slice.is_char_boundary(self.end) {
1976 // SAFETY: just checked that `end` is on a char boundary,
1977 // and we are passing in a safe reference, so the return value will also be one.
1978 Some(unsafe { &mut *self.get_unchecked_mut(slice) })
1984 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
1985 let slice = slice as *const [u8];
1986 let ptr = slice.as_ptr();
1987 ptr::slice_from_raw_parts(ptr, self.end) as *const str
1990 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
1991 let slice = slice as *mut [u8];
1992 let ptr = slice.as_mut_ptr();
1993 ptr::slice_from_raw_parts_mut(ptr, self.end) as *mut str
1996 fn index(self, slice: &str) -> &Self::Output {
1998 self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, 0, end))
2001 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2002 if slice.is_char_boundary(self.end) {
2003 // SAFETY: just checked that `end` is on a char boundary,
2004 // and we are passing in a safe reference, so the return value will also be one.
2005 unsafe { &mut *self.get_unchecked_mut(slice) }
2007 super::slice_error_fail(slice, 0, self.end)
2012 /// Implements substring slicing with syntax `&self[begin ..]` or `&mut
2013 /// self[begin ..]`.
2015 /// Returns a slice of the given string from the byte range [`begin`,
2016 /// `len`). Equivalent to `&self[begin .. len]` or `&mut self[begin ..
2019 /// This operation is `O(1)`.
2021 /// Prior to 1.20.0, these indexing operations were still supported by
2022 /// direct implementation of `Index` and `IndexMut`.
2026 /// Panics if `begin` does not point to the starting byte offset of
2027 /// a character (as defined by `is_char_boundary`), or if `begin >= len`.
2028 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2029 unsafe impl SliceIndex<str> for ops::RangeFrom<usize> {
2032 fn get(self, slice: &str) -> Option<&Self::Output> {
2033 if slice.is_char_boundary(self.start) {
2034 // SAFETY: just checked that `start` is on a char boundary,
2035 // and we are passing in a safe reference, so the return value will also be one.
2036 Some(unsafe { &*self.get_unchecked(slice) })
2042 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2043 if slice.is_char_boundary(self.start) {
2044 // SAFETY: just checked that `start` is on a char boundary,
2045 // and we are passing in a safe reference, so the return value will also be one.
2046 Some(unsafe { &mut *self.get_unchecked_mut(slice) })
2052 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
2053 let slice = slice as *const [u8];
2054 // SAFETY: the caller guarantees that `self` is in bounds of `slice`
2055 // which satisfies all the conditions for `add`.
2056 let ptr = unsafe { slice.as_ptr().add(self.start) };
2057 let len = slice.len() - self.start;
2058 ptr::slice_from_raw_parts(ptr, len) as *const str
2061 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
2062 let slice = slice as *mut [u8];
2063 // SAFETY: identical to `get_unchecked`.
2064 let ptr = unsafe { slice.as_mut_ptr().add(self.start) };
2065 let len = slice.len() - self.start;
2066 ptr::slice_from_raw_parts_mut(ptr, len) as *mut str
2069 fn index(self, slice: &str) -> &Self::Output {
2070 let (start, end) = (self.start, slice.len());
2071 self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, start, end))
2074 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2075 if slice.is_char_boundary(self.start) {
2076 // SAFETY: just checked that `start` is on a char boundary,
2077 // and we are passing in a safe reference, so the return value will also be one.
2078 unsafe { &mut *self.get_unchecked_mut(slice) }
2080 super::slice_error_fail(slice, self.start, slice.len())
2085 /// Implements substring slicing with syntax `&self[begin ..= end]` or `&mut
2086 /// self[begin ..= end]`.
2088 /// Returns a slice of the given string from the byte range
2089 /// [`begin`, `end`]. Equivalent to `&self [begin .. end + 1]` or `&mut
2090 /// self[begin .. end + 1]`, except if `end` has the maximum value for
2093 /// This operation is `O(1)`.
2097 /// Panics if `begin` does not point to the starting byte offset of
2098 /// a character (as defined by `is_char_boundary`), if `end` does not point
2099 /// to the ending byte offset of a character (`end + 1` is either a starting
2100 /// byte offset or equal to `len`), if `begin > end`, or if `end >= len`.
2101 #[stable(feature = "inclusive_range", since = "1.26.0")]
2102 unsafe impl SliceIndex<str> for ops::RangeInclusive<usize> {
2105 fn get(self, slice: &str) -> Option<&Self::Output> {
2106 if *self.end() == usize::MAX {
2109 (*self.start()..self.end() + 1).get(slice)
2113 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2114 if *self.end() == usize::MAX {
2117 (*self.start()..self.end() + 1).get_mut(slice)
2121 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
2122 // SAFETY: the caller must uphold the safety contract for `get_unchecked`.
2123 unsafe { (*self.start()..self.end() + 1).get_unchecked(slice) }
2126 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
2127 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
2128 unsafe { (*self.start()..self.end() + 1).get_unchecked_mut(slice) }
2131 fn index(self, slice: &str) -> &Self::Output {
2132 if *self.end() == usize::MAX {
2133 str_index_overflow_fail();
2135 (*self.start()..self.end() + 1).index(slice)
2138 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2139 if *self.end() == usize::MAX {
2140 str_index_overflow_fail();
2142 (*self.start()..self.end() + 1).index_mut(slice)
2146 /// Implements substring slicing with syntax `&self[..= end]` or `&mut
2149 /// Returns a slice of the given string from the byte range [0, `end`].
2150 /// Equivalent to `&self [0 .. end + 1]`, except if `end` has the maximum
2151 /// value for `usize`.
2153 /// This operation is `O(1)`.
2157 /// Panics if `end` does not point to the ending byte offset of a character
2158 /// (`end + 1` is either a starting byte offset as defined by
2159 /// `is_char_boundary`, or equal to `len`), or if `end >= len`.
2160 #[stable(feature = "inclusive_range", since = "1.26.0")]
2161 unsafe impl SliceIndex<str> for ops::RangeToInclusive<usize> {
2164 fn get(self, slice: &str) -> Option<&Self::Output> {
2165 if self.end == usize::MAX { None } else { (..self.end + 1).get(slice) }
2168 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2169 if self.end == usize::MAX { None } else { (..self.end + 1).get_mut(slice) }
2172 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
2173 // SAFETY: the caller must uphold the safety contract for `get_unchecked`.
2174 unsafe { (..self.end + 1).get_unchecked(slice) }
2177 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
2178 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
2179 unsafe { (..self.end + 1).get_unchecked_mut(slice) }
2182 fn index(self, slice: &str) -> &Self::Output {
2183 if self.end == usize::MAX {
2184 str_index_overflow_fail();
2186 (..self.end + 1).index(slice)
2189 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2190 if self.end == usize::MAX {
2191 str_index_overflow_fail();
2193 (..self.end + 1).index_mut(slice)
2198 // truncate `&str` to length at most equal to `max`
2199 // return `true` if it were truncated, and the new str.
2200 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
2204 while !s.is_char_boundary(max) {
2214 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
2215 const MAX_DISPLAY_LENGTH: usize = 256;
2216 let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
2217 let ellipsis = if truncated { "[...]" } else { "" };
2220 if begin > s.len() || end > s.len() {
2221 let oob_index = if begin > s.len() { begin } else { end };
2222 panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
2228 "begin <= end ({} <= {}) when slicing `{}`{}",
2235 // 3. character boundary
2236 let index = if !s.is_char_boundary(begin) { begin } else { end };
2237 // find the character
2238 let mut char_start = index;
2239 while !s.is_char_boundary(char_start) {
2242 // `char_start` must be less than len and a char boundary
2243 let ch = s[char_start..].chars().next().unwrap();
2244 let char_range = char_start..char_start + ch.len_utf8();
2246 "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
2247 index, ch, char_range, s_trunc, ellipsis
2254 /// Returns the length of `self`.
2256 /// This length is in bytes, not [`char`]s or graphemes. In other words,
2257 /// it may not be what a human considers the length of the string.
2264 /// let len = "foo".len();
2265 /// assert_eq!(3, len);
2267 /// assert_eq!("ƒoo".len(), 4); // fancy f!
2268 /// assert_eq!("ƒoo".chars().count(), 3);
2270 #[stable(feature = "rust1", since = "1.0.0")]
2271 #[rustc_const_stable(feature = "const_str_len", since = "1.32.0")]
2273 pub const fn len(&self) -> usize {
2274 self.as_bytes().len()
2277 /// Returns `true` if `self` has a length of zero bytes.
2285 /// assert!(s.is_empty());
2287 /// let s = "not empty";
2288 /// assert!(!s.is_empty());
2291 #[stable(feature = "rust1", since = "1.0.0")]
2292 #[rustc_const_stable(feature = "const_str_is_empty", since = "1.32.0")]
2293 pub const fn is_empty(&self) -> bool {
2297 /// Checks that `index`-th byte is the first byte in a UTF-8 code point
2298 /// sequence or the end of the string.
2300 /// The start and end of the string (when `index == self.len()`) are
2301 /// considered to be boundaries.
2303 /// Returns `false` if `index` is greater than `self.len()`.
2308 /// let s = "Löwe 老虎 Léopard";
2309 /// assert!(s.is_char_boundary(0));
2311 /// assert!(s.is_char_boundary(6));
2312 /// assert!(s.is_char_boundary(s.len()));
2314 /// // second byte of `ö`
2315 /// assert!(!s.is_char_boundary(2));
2317 /// // third byte of `老`
2318 /// assert!(!s.is_char_boundary(8));
2320 #[stable(feature = "is_char_boundary", since = "1.9.0")]
2322 pub fn is_char_boundary(&self, index: usize) -> bool {
2323 // 0 and len are always ok.
2324 // Test for 0 explicitly so that it can optimize out the check
2325 // easily and skip reading string data for that case.
2326 if index == 0 || index == self.len() {
2329 match self.as_bytes().get(index) {
2331 // This is bit magic equivalent to: b < 128 || b >= 192
2332 Some(&b) => (b as i8) >= -0x40,
2336 /// Converts a string slice to a byte slice. To convert the byte slice back
2337 /// into a string slice, use the [`from_utf8`] function.
2344 /// let bytes = "bors".as_bytes();
2345 /// assert_eq!(b"bors", bytes);
2347 #[stable(feature = "rust1", since = "1.0.0")]
2348 #[rustc_const_stable(feature = "str_as_bytes", since = "1.32.0")]
2350 #[allow(unused_attributes)]
2351 #[allow_internal_unstable(const_fn_union)]
2352 pub const fn as_bytes(&self) -> &[u8] {
2358 // SAFETY: const sound because we transmute two types with the same layout
2359 unsafe { Slices { str: self }.slice }
2362 /// Converts a mutable string slice to a mutable byte slice.
2366 /// The caller must ensure that the content of the slice is valid UTF-8
2367 /// before the borrow ends and the underlying `str` is used.
2369 /// Use of a `str` whose contents are not valid UTF-8 is undefined behavior.
2376 /// let mut s = String::from("Hello");
2377 /// let bytes = unsafe { s.as_bytes_mut() };
2379 /// assert_eq!(b"Hello", bytes);
2385 /// let mut s = String::from("🗻∈🌏");
2388 /// let bytes = s.as_bytes_mut();
2390 /// bytes[0] = 0xF0;
2391 /// bytes[1] = 0x9F;
2392 /// bytes[2] = 0x8D;
2393 /// bytes[3] = 0x94;
2396 /// assert_eq!("🍔∈🌏", s);
2398 #[stable(feature = "str_mut_extras", since = "1.20.0")]
2400 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
2401 // SAFETY: the cast from `&str` to `&[u8]` is safe since `str`
2402 // has the same layout as `&[u8]` (only libstd can make this guarantee).
2403 // The pointer dereference is safe since it comes from a mutable reference which
2404 // is guaranteed to be valid for writes.
2405 unsafe { &mut *(self as *mut str as *mut [u8]) }
2408 /// Converts a string slice to a raw pointer.
2410 /// As string slices are a slice of bytes, the raw pointer points to a
2411 /// [`u8`]. This pointer will be pointing to the first byte of the string
2414 /// The caller must ensure that the returned pointer is never written to.
2415 /// If you need to mutate the contents of the string slice, use [`as_mut_ptr`].
2417 /// [`as_mut_ptr`]: str::as_mut_ptr
2424 /// let s = "Hello";
2425 /// let ptr = s.as_ptr();
2427 #[stable(feature = "rust1", since = "1.0.0")]
2428 #[rustc_const_stable(feature = "rustc_str_as_ptr", since = "1.32.0")]
2430 pub const fn as_ptr(&self) -> *const u8 {
2431 self as *const str as *const u8
2434 /// Converts a mutable string slice to a raw pointer.
2436 /// As string slices are a slice of bytes, the raw pointer points to a
2437 /// [`u8`]. This pointer will be pointing to the first byte of the string
2440 /// It is your responsibility to make sure that the string slice only gets
2441 /// modified in a way that it remains valid UTF-8.
2442 #[stable(feature = "str_as_mut_ptr", since = "1.36.0")]
2444 pub fn as_mut_ptr(&mut self) -> *mut u8 {
2445 self as *mut str as *mut u8
2448 /// Returns a subslice of `str`.
2450 /// This is the non-panicking alternative to indexing the `str`. Returns
2451 /// [`None`] whenever equivalent indexing operation would panic.
2456 /// let v = String::from("🗻∈🌏");
2458 /// assert_eq!(Some("🗻"), v.get(0..4));
2460 /// // indices not on UTF-8 sequence boundaries
2461 /// assert!(v.get(1..).is_none());
2462 /// assert!(v.get(..8).is_none());
2464 /// // out of bounds
2465 /// assert!(v.get(..42).is_none());
2467 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2469 pub fn get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output> {
2473 /// Returns a mutable subslice of `str`.
2475 /// This is the non-panicking alternative to indexing the `str`. Returns
2476 /// [`None`] whenever equivalent indexing operation would panic.
2481 /// let mut v = String::from("hello");
2482 /// // correct length
2483 /// assert!(v.get_mut(0..5).is_some());
2484 /// // out of bounds
2485 /// assert!(v.get_mut(..42).is_none());
2486 /// assert_eq!(Some("he"), v.get_mut(0..2).map(|v| &*v));
2488 /// assert_eq!("hello", v);
2490 /// let s = v.get_mut(0..2);
2491 /// let s = s.map(|s| {
2492 /// s.make_ascii_uppercase();
2495 /// assert_eq!(Some("HE"), s);
2497 /// assert_eq!("HEllo", v);
2499 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2501 pub fn get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output> {
2505 /// Returns an unchecked subslice of `str`.
2507 /// This is the unchecked alternative to indexing the `str`.
2511 /// Callers of this function are responsible that these preconditions are
2514 /// * The starting index must not exceed the ending index;
2515 /// * Indexes must be within bounds of the original slice;
2516 /// * Indexes must lie on UTF-8 sequence boundaries.
2518 /// Failing that, the returned string slice may reference invalid memory or
2519 /// violate the invariants communicated by the `str` type.
2526 /// assert_eq!("🗻", v.get_unchecked(0..4));
2527 /// assert_eq!("∈", v.get_unchecked(4..7));
2528 /// assert_eq!("🌏", v.get_unchecked(7..11));
2531 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2533 pub unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output {
2534 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
2535 // the slice is dereferencable because `self` is a safe reference.
2536 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2537 unsafe { &*i.get_unchecked(self) }
2540 /// Returns a mutable, unchecked subslice of `str`.
2542 /// This is the unchecked alternative to indexing the `str`.
2546 /// Callers of this function are responsible that these preconditions are
2549 /// * The starting index must not exceed the ending index;
2550 /// * Indexes must be within bounds of the original slice;
2551 /// * Indexes must lie on UTF-8 sequence boundaries.
2553 /// Failing that, the returned string slice may reference invalid memory or
2554 /// violate the invariants communicated by the `str` type.
2559 /// let mut v = String::from("🗻∈🌏");
2561 /// assert_eq!("🗻", v.get_unchecked_mut(0..4));
2562 /// assert_eq!("∈", v.get_unchecked_mut(4..7));
2563 /// assert_eq!("🌏", v.get_unchecked_mut(7..11));
2566 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2568 pub unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output {
2569 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
2570 // the slice is dereferencable because `self` is a safe reference.
2571 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2572 unsafe { &mut *i.get_unchecked_mut(self) }
2575 /// Creates a string slice from another string slice, bypassing safety
2578 /// This is generally not recommended, use with caution! For a safe
2579 /// alternative see [`str`] and [`Index`].
2581 /// [`Index`]: crate::ops::Index
2583 /// This new slice goes from `begin` to `end`, including `begin` but
2584 /// excluding `end`.
2586 /// To get a mutable string slice instead, see the
2587 /// [`slice_mut_unchecked`] method.
2589 /// [`slice_mut_unchecked`]: str::slice_mut_unchecked
2593 /// Callers of this function are responsible that three preconditions are
2596 /// * `begin` must not exceed `end`.
2597 /// * `begin` and `end` must be byte positions within the string slice.
2598 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2605 /// let s = "Löwe 老虎 Léopard";
2608 /// assert_eq!("Löwe 老虎 Léopard", s.slice_unchecked(0, 21));
2611 /// let s = "Hello, world!";
2614 /// assert_eq!("world", s.slice_unchecked(7, 12));
2617 #[stable(feature = "rust1", since = "1.0.0")]
2618 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked(begin..end)` instead")]
2620 pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
2621 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
2622 // the slice is dereferencable because `self` is a safe reference.
2623 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2624 unsafe { &*(begin..end).get_unchecked(self) }
2627 /// Creates a string slice from another string slice, bypassing safety
2629 /// This is generally not recommended, use with caution! For a safe
2630 /// alternative see [`str`] and [`IndexMut`].
2632 /// [`IndexMut`]: crate::ops::IndexMut
2634 /// This new slice goes from `begin` to `end`, including `begin` but
2635 /// excluding `end`.
2637 /// To get an immutable string slice instead, see the
2638 /// [`slice_unchecked`] method.
2640 /// [`slice_unchecked`]: str::slice_unchecked
2644 /// Callers of this function are responsible that three preconditions are
2647 /// * `begin` must not exceed `end`.
2648 /// * `begin` and `end` must be byte positions within the string slice.
2649 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2650 #[stable(feature = "str_slice_mut", since = "1.5.0")]
2651 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked_mut(begin..end)` instead")]
2653 pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
2654 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
2655 // the slice is dereferencable because `self` is a safe reference.
2656 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2657 unsafe { &mut *(begin..end).get_unchecked_mut(self) }
2660 /// Divide one string slice into two at an index.
2662 /// The argument, `mid`, should be a byte offset from the start of the
2663 /// string. It must also be on the boundary of a UTF-8 code point.
2665 /// The two slices returned go from the start of the string slice to `mid`,
2666 /// and from `mid` to the end of the string slice.
2668 /// To get mutable string slices instead, see the [`split_at_mut`]
2671 /// [`split_at_mut`]: str::split_at_mut
2675 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2676 /// past the end of the last code point of the string slice.
2683 /// let s = "Per Martin-Löf";
2685 /// let (first, last) = s.split_at(3);
2687 /// assert_eq!("Per", first);
2688 /// assert_eq!(" Martin-Löf", last);
2691 #[stable(feature = "str_split_at", since = "1.4.0")]
2692 pub fn split_at(&self, mid: usize) -> (&str, &str) {
2693 // is_char_boundary checks that the index is in [0, .len()]
2694 if self.is_char_boundary(mid) {
2695 // SAFETY: just checked that `mid` is on a char boundary.
2696 unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) }
2698 slice_error_fail(self, 0, mid)
2702 /// Divide one mutable string slice into two at an index.
2704 /// The argument, `mid`, should be a byte offset from the start of the
2705 /// string. It must also be on the boundary of a UTF-8 code point.
2707 /// The two slices returned go from the start of the string slice to `mid`,
2708 /// and from `mid` to the end of the string slice.
2710 /// To get immutable string slices instead, see the [`split_at`] method.
2712 /// [`split_at`]: str::split_at
2716 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2717 /// past the end of the last code point of the string slice.
2724 /// let mut s = "Per Martin-Löf".to_string();
2726 /// let (first, last) = s.split_at_mut(3);
2727 /// first.make_ascii_uppercase();
2728 /// assert_eq!("PER", first);
2729 /// assert_eq!(" Martin-Löf", last);
2731 /// assert_eq!("PER Martin-Löf", s);
2734 #[stable(feature = "str_split_at", since = "1.4.0")]
2735 pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2736 // is_char_boundary checks that the index is in [0, .len()]
2737 if self.is_char_boundary(mid) {
2738 let len = self.len();
2739 let ptr = self.as_mut_ptr();
2740 // SAFETY: just checked that `mid` is on a char boundary.
2743 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
2744 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr.add(mid), len - mid)),
2748 slice_error_fail(self, 0, mid)
2752 /// Returns an iterator over the [`char`]s of a string slice.
2754 /// As a string slice consists of valid UTF-8, we can iterate through a
2755 /// string slice by [`char`]. This method returns such an iterator.
2757 /// It's important to remember that [`char`] represents a Unicode Scalar
2758 /// Value, and may not match your idea of what a 'character' is. Iteration
2759 /// over grapheme clusters may be what you actually want. This functionality
2760 /// is not provided by Rust's standard library, check crates.io instead.
2767 /// let word = "goodbye";
2769 /// let count = word.chars().count();
2770 /// assert_eq!(7, count);
2772 /// let mut chars = word.chars();
2774 /// assert_eq!(Some('g'), chars.next());
2775 /// assert_eq!(Some('o'), chars.next());
2776 /// assert_eq!(Some('o'), chars.next());
2777 /// assert_eq!(Some('d'), chars.next());
2778 /// assert_eq!(Some('b'), chars.next());
2779 /// assert_eq!(Some('y'), chars.next());
2780 /// assert_eq!(Some('e'), chars.next());
2782 /// assert_eq!(None, chars.next());
2785 /// Remember, [`char`]s may not match your human intuition about characters:
2790 /// let mut chars = y.chars();
2792 /// assert_eq!(Some('y'), chars.next()); // not 'y̆'
2793 /// assert_eq!(Some('\u{0306}'), chars.next());
2795 /// assert_eq!(None, chars.next());
2797 #[stable(feature = "rust1", since = "1.0.0")]
2799 pub fn chars(&self) -> Chars<'_> {
2800 Chars { iter: self.as_bytes().iter() }
2803 /// Returns an iterator over the [`char`]s of a string slice, and their
2806 /// As a string slice consists of valid UTF-8, we can iterate through a
2807 /// string slice by [`char`]. This method returns an iterator of both
2808 /// these [`char`]s, as well as their byte positions.
2810 /// The iterator yields tuples. The position is first, the [`char`] is
2818 /// let word = "goodbye";
2820 /// let count = word.char_indices().count();
2821 /// assert_eq!(7, count);
2823 /// let mut char_indices = word.char_indices();
2825 /// assert_eq!(Some((0, 'g')), char_indices.next());
2826 /// assert_eq!(Some((1, 'o')), char_indices.next());
2827 /// assert_eq!(Some((2, 'o')), char_indices.next());
2828 /// assert_eq!(Some((3, 'd')), char_indices.next());
2829 /// assert_eq!(Some((4, 'b')), char_indices.next());
2830 /// assert_eq!(Some((5, 'y')), char_indices.next());
2831 /// assert_eq!(Some((6, 'e')), char_indices.next());
2833 /// assert_eq!(None, char_indices.next());
2836 /// Remember, [`char`]s may not match your human intuition about characters:
2839 /// let yes = "y̆es";
2841 /// let mut char_indices = yes.char_indices();
2843 /// assert_eq!(Some((0, 'y')), char_indices.next()); // not (0, 'y̆')
2844 /// assert_eq!(Some((1, '\u{0306}')), char_indices.next());
2846 /// // note the 3 here - the last character took up two bytes
2847 /// assert_eq!(Some((3, 'e')), char_indices.next());
2848 /// assert_eq!(Some((4, 's')), char_indices.next());
2850 /// assert_eq!(None, char_indices.next());
2852 #[stable(feature = "rust1", since = "1.0.0")]
2854 pub fn char_indices(&self) -> CharIndices<'_> {
2855 CharIndices { front_offset: 0, iter: self.chars() }
2858 /// An iterator over the bytes of a string slice.
2860 /// As a string slice consists of a sequence of bytes, we can iterate
2861 /// through a string slice by byte. This method returns such an iterator.
2868 /// let mut bytes = "bors".bytes();
2870 /// assert_eq!(Some(b'b'), bytes.next());
2871 /// assert_eq!(Some(b'o'), bytes.next());
2872 /// assert_eq!(Some(b'r'), bytes.next());
2873 /// assert_eq!(Some(b's'), bytes.next());
2875 /// assert_eq!(None, bytes.next());
2877 #[stable(feature = "rust1", since = "1.0.0")]
2879 pub fn bytes(&self) -> Bytes<'_> {
2880 Bytes(self.as_bytes().iter().copied())
2883 /// Splits a string slice by whitespace.
2885 /// The iterator returned will return string slices that are sub-slices of
2886 /// the original string slice, separated by any amount of whitespace.
2888 /// 'Whitespace' is defined according to the terms of the Unicode Derived
2889 /// Core Property `White_Space`. If you only want to split on ASCII whitespace
2890 /// instead, use [`split_ascii_whitespace`].
2892 /// [`split_ascii_whitespace`]: str::split_ascii_whitespace
2899 /// let mut iter = "A few words".split_whitespace();
2901 /// assert_eq!(Some("A"), iter.next());
2902 /// assert_eq!(Some("few"), iter.next());
2903 /// assert_eq!(Some("words"), iter.next());
2905 /// assert_eq!(None, iter.next());
2908 /// All kinds of whitespace are considered:
2911 /// let mut iter = " Mary had\ta\u{2009}little \n\t lamb".split_whitespace();
2912 /// assert_eq!(Some("Mary"), iter.next());
2913 /// assert_eq!(Some("had"), iter.next());
2914 /// assert_eq!(Some("a"), iter.next());
2915 /// assert_eq!(Some("little"), iter.next());
2916 /// assert_eq!(Some("lamb"), iter.next());
2918 /// assert_eq!(None, iter.next());
2920 #[stable(feature = "split_whitespace", since = "1.1.0")]
2922 pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
2923 SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
2926 /// Splits a string slice by ASCII whitespace.
2928 /// The iterator returned will return string slices that are sub-slices of
2929 /// the original string slice, separated by any amount of ASCII whitespace.
2931 /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
2933 /// [`split_whitespace`]: str::split_whitespace
2940 /// let mut iter = "A few words".split_ascii_whitespace();
2942 /// assert_eq!(Some("A"), iter.next());
2943 /// assert_eq!(Some("few"), iter.next());
2944 /// assert_eq!(Some("words"), iter.next());
2946 /// assert_eq!(None, iter.next());
2949 /// All kinds of ASCII whitespace are considered:
2952 /// let mut iter = " Mary had\ta little \n\t lamb".split_ascii_whitespace();
2953 /// assert_eq!(Some("Mary"), iter.next());
2954 /// assert_eq!(Some("had"), iter.next());
2955 /// assert_eq!(Some("a"), iter.next());
2956 /// assert_eq!(Some("little"), iter.next());
2957 /// assert_eq!(Some("lamb"), iter.next());
2959 /// assert_eq!(None, iter.next());
2961 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
2963 pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
2965 self.as_bytes().split(IsAsciiWhitespace).filter(BytesIsNotEmpty).map(UnsafeBytesToStr);
2966 SplitAsciiWhitespace { inner }
2969 /// An iterator over the lines of a string, as string slices.
2971 /// Lines are ended with either a newline (`\n`) or a carriage return with
2972 /// a line feed (`\r\n`).
2974 /// The final line ending is optional.
2981 /// let text = "foo\r\nbar\n\nbaz\n";
2982 /// let mut lines = text.lines();
2984 /// assert_eq!(Some("foo"), lines.next());
2985 /// assert_eq!(Some("bar"), lines.next());
2986 /// assert_eq!(Some(""), lines.next());
2987 /// assert_eq!(Some("baz"), lines.next());
2989 /// assert_eq!(None, lines.next());
2992 /// The final line ending isn't required:
2995 /// let text = "foo\nbar\n\r\nbaz";
2996 /// let mut lines = text.lines();
2998 /// assert_eq!(Some("foo"), lines.next());
2999 /// assert_eq!(Some("bar"), lines.next());
3000 /// assert_eq!(Some(""), lines.next());
3001 /// assert_eq!(Some("baz"), lines.next());
3003 /// assert_eq!(None, lines.next());
3005 #[stable(feature = "rust1", since = "1.0.0")]
3007 pub fn lines(&self) -> Lines<'_> {
3008 Lines(self.split_terminator('\n').map(LinesAnyMap))
3011 /// An iterator over the lines of a string.
3012 #[stable(feature = "rust1", since = "1.0.0")]
3013 #[rustc_deprecated(since = "1.4.0", reason = "use lines() instead now")]
3015 #[allow(deprecated)]
3016 pub fn lines_any(&self) -> LinesAny<'_> {
3017 LinesAny(self.lines())
3020 /// Returns an iterator of `u16` over the string encoded as UTF-16.
3027 /// let text = "Zażółć gęślą jaźń";
3029 /// let utf8_len = text.len();
3030 /// let utf16_len = text.encode_utf16().count();
3032 /// assert!(utf16_len <= utf8_len);
3034 #[stable(feature = "encode_utf16", since = "1.8.0")]
3035 pub fn encode_utf16(&self) -> EncodeUtf16<'_> {
3036 EncodeUtf16 { chars: self.chars(), extra: 0 }
3039 /// Returns `true` if the given pattern matches a sub-slice of
3040 /// this string slice.
3042 /// Returns `false` if it does not.
3044 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3045 /// function or closure that determines if a character matches.
3047 /// [pattern]: self::pattern
3054 /// let bananas = "bananas";
3056 /// assert!(bananas.contains("nana"));
3057 /// assert!(!bananas.contains("apples"));
3059 #[stable(feature = "rust1", since = "1.0.0")]
3061 pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
3062 pat.is_contained_in(self)
3065 /// Returns `true` if the given pattern matches a prefix of this
3068 /// Returns `false` if it does not.
3070 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3071 /// function or closure that determines if a character matches.
3073 /// [pattern]: self::pattern
3080 /// let bananas = "bananas";
3082 /// assert!(bananas.starts_with("bana"));
3083 /// assert!(!bananas.starts_with("nana"));
3085 #[stable(feature = "rust1", since = "1.0.0")]
3086 pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
3087 pat.is_prefix_of(self)
3090 /// Returns `true` if the given pattern matches a suffix of this
3093 /// Returns `false` if it does not.
3095 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3096 /// function or closure that determines if a character matches.
3098 /// [pattern]: self::pattern
3105 /// let bananas = "bananas";
3107 /// assert!(bananas.ends_with("anas"));
3108 /// assert!(!bananas.ends_with("nana"));
3110 #[stable(feature = "rust1", since = "1.0.0")]
3111 pub fn ends_with<'a, P>(&'a self, pat: P) -> bool
3113 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3115 pat.is_suffix_of(self)
3118 /// Returns the byte index of the first character of this string slice that
3119 /// matches the pattern.
3121 /// Returns [`None`] if the pattern doesn't match.
3123 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3124 /// function or closure that determines if a character matches.
3126 /// [pattern]: self::pattern
3130 /// Simple patterns:
3133 /// let s = "Löwe 老虎 Léopard Gepardi";
3135 /// assert_eq!(s.find('L'), Some(0));
3136 /// assert_eq!(s.find('é'), Some(14));
3137 /// assert_eq!(s.find("pard"), Some(17));
3140 /// More complex patterns using point-free style and closures:
3143 /// let s = "Löwe 老虎 Léopard";
3145 /// assert_eq!(s.find(char::is_whitespace), Some(5));
3146 /// assert_eq!(s.find(char::is_lowercase), Some(1));
3147 /// assert_eq!(s.find(|c: char| c.is_whitespace() || c.is_lowercase()), Some(1));
3148 /// assert_eq!(s.find(|c: char| (c < 'o') && (c > 'a')), Some(4));
3151 /// Not finding the pattern:
3154 /// let s = "Löwe 老虎 Léopard";
3155 /// let x: &[_] = &['1', '2'];
3157 /// assert_eq!(s.find(x), None);
3159 #[stable(feature = "rust1", since = "1.0.0")]
3161 pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
3162 pat.into_searcher(self).next_match().map(|(i, _)| i)
3165 /// Returns the byte index for the first character of the rightmost match of the pattern in
3166 /// this string slice.
3168 /// Returns [`None`] if the pattern doesn't match.
3170 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3171 /// function or closure that determines if a character matches.
3173 /// [pattern]: self::pattern
3177 /// Simple patterns:
3180 /// let s = "Löwe 老虎 Léopard Gepardi";
3182 /// assert_eq!(s.rfind('L'), Some(13));
3183 /// assert_eq!(s.rfind('é'), Some(14));
3184 /// assert_eq!(s.rfind("pard"), Some(24));
3187 /// More complex patterns with closures:
3190 /// let s = "Löwe 老虎 Léopard";
3192 /// assert_eq!(s.rfind(char::is_whitespace), Some(12));
3193 /// assert_eq!(s.rfind(char::is_lowercase), Some(20));
3196 /// Not finding the pattern:
3199 /// let s = "Löwe 老虎 Léopard";
3200 /// let x: &[_] = &['1', '2'];
3202 /// assert_eq!(s.rfind(x), None);
3204 #[stable(feature = "rust1", since = "1.0.0")]
3206 pub fn rfind<'a, P>(&'a self, pat: P) -> Option<usize>
3208 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3210 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
3213 /// An iterator over substrings of this string slice, separated by
3214 /// characters matched by a pattern.
3216 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3217 /// function or closure that determines if a character matches.
3219 /// [pattern]: self::pattern
3221 /// # Iterator behavior
3223 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3224 /// allows a reverse search and forward/reverse search yields the same
3225 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3227 /// If the pattern allows a reverse search but its results might differ
3228 /// from a forward search, the [`rsplit`] method can be used.
3230 /// [`rsplit`]: str::rsplit
3234 /// Simple patterns:
3237 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
3238 /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
3240 /// let v: Vec<&str> = "".split('X').collect();
3241 /// assert_eq!(v, [""]);
3243 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
3244 /// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
3246 /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect();
3247 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3249 /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect();
3250 /// assert_eq!(v, ["abc", "def", "ghi"]);
3252 /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect();
3253 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3256 /// A more complex pattern, using a closure:
3259 /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect();
3260 /// assert_eq!(v, ["abc", "def", "ghi"]);
3263 /// If a string contains multiple contiguous separators, you will end up
3264 /// with empty strings in the output:
3267 /// let x = "||||a||b|c".to_string();
3268 /// let d: Vec<_> = x.split('|').collect();
3270 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3273 /// Contiguous separators are separated by the empty string.
3276 /// let x = "(///)".to_string();
3277 /// let d: Vec<_> = x.split('/').collect();
3279 /// assert_eq!(d, &["(", "", "", ")"]);
3282 /// Separators at the start or end of a string are neighbored
3283 /// by empty strings.
3286 /// let d: Vec<_> = "010".split("0").collect();
3287 /// assert_eq!(d, &["", "1", ""]);
3290 /// When the empty string is used as a separator, it separates
3291 /// every character in the string, along with the beginning
3292 /// and end of the string.
3295 /// let f: Vec<_> = "rust".split("").collect();
3296 /// assert_eq!(f, &["", "r", "u", "s", "t", ""]);
3299 /// Contiguous separators can lead to possibly surprising behavior
3300 /// when whitespace is used as the separator. This code is correct:
3303 /// let x = " a b c".to_string();
3304 /// let d: Vec<_> = x.split(' ').collect();
3306 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3309 /// It does _not_ give you:
3312 /// assert_eq!(d, &["a", "b", "c"]);
3315 /// Use [`split_whitespace`] for this behavior.
3317 /// [`split_whitespace`]: str::split_whitespace
3318 #[stable(feature = "rust1", since = "1.0.0")]
3320 pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
3321 Split(SplitInternal {
3324 matcher: pat.into_searcher(self),
3325 allow_trailing_empty: true,
3330 /// An iterator over substrings of this string slice, separated by
3331 /// characters matched by a pattern. Differs from the iterator produced by
3332 /// `split` in that `split_inclusive` leaves the matched part as the
3333 /// terminator of the substring.
3335 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3336 /// function or closure that determines if a character matches.
3338 /// [pattern]: self::pattern
3343 /// #![feature(split_inclusive)]
3344 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
3345 /// .split_inclusive('\n').collect();
3346 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
3349 /// If the last element of the string is matched,
3350 /// that element will be considered the terminator of the preceding substring.
3351 /// That substring will be the last item returned by the iterator.
3354 /// #![feature(split_inclusive)]
3355 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n"
3356 /// .split_inclusive('\n').collect();
3357 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb.\n"]);
3359 #[unstable(feature = "split_inclusive", issue = "72360")]
3361 pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> {
3362 SplitInclusive(SplitInternal {
3365 matcher: pat.into_searcher(self),
3366 allow_trailing_empty: false,
3371 /// An iterator over substrings of the given string slice, separated by
3372 /// characters matched by a pattern and yielded in reverse order.
3374 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3375 /// function or closure that determines if a character matches.
3377 /// [pattern]: self::pattern
3379 /// # Iterator behavior
3381 /// The returned iterator requires that the pattern supports a reverse
3382 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3383 /// search yields the same elements.
3385 /// For iterating from the front, the [`split`] method can be used.
3387 /// [`split`]: str::split
3391 /// Simple patterns:
3394 /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
3395 /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
3397 /// let v: Vec<&str> = "".rsplit('X').collect();
3398 /// assert_eq!(v, [""]);
3400 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect();
3401 /// assert_eq!(v, ["leopard", "tiger", "", "lion"]);
3403 /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
3404 /// assert_eq!(v, ["leopard", "tiger", "lion"]);
3407 /// A more complex pattern, using a closure:
3410 /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect();
3411 /// assert_eq!(v, ["ghi", "def", "abc"]);
3413 #[stable(feature = "rust1", since = "1.0.0")]
3415 pub fn rsplit<'a, P>(&'a self, pat: P) -> RSplit<'a, P>
3417 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3419 RSplit(self.split(pat).0)
3422 /// An iterator over substrings of the given string slice, separated by
3423 /// characters matched by a pattern.
3425 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3426 /// function or closure that determines if a character matches.
3428 /// [pattern]: self::pattern
3430 /// Equivalent to [`split`], except that the trailing substring
3431 /// is skipped if empty.
3433 /// [`split`]: str::split
3435 /// This method can be used for string data that is _terminated_,
3436 /// rather than _separated_ by a pattern.
3438 /// # Iterator behavior
3440 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3441 /// allows a reverse search and forward/reverse search yields the same
3442 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3444 /// If the pattern allows a reverse search but its results might differ
3445 /// from a forward search, the [`rsplit_terminator`] method can be used.
3447 /// [`rsplit_terminator`]: str::rsplit_terminator
3454 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
3455 /// assert_eq!(v, ["A", "B"]);
3457 /// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
3458 /// assert_eq!(v, ["A", "", "B", ""]);
3460 #[stable(feature = "rust1", since = "1.0.0")]
3462 pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
3463 SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 })
3466 /// An iterator over substrings of `self`, separated by characters
3467 /// matched by a pattern and yielded in reverse order.
3469 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3470 /// function or closure that determines if a character matches.
3472 /// [pattern]: self::pattern
3474 /// Equivalent to [`split`], except that the trailing substring is
3475 /// skipped if empty.
3477 /// [`split`]: str::split
3479 /// This method can be used for string data that is _terminated_,
3480 /// rather than _separated_ by a pattern.
3482 /// # Iterator behavior
3484 /// The returned iterator requires that the pattern supports a
3485 /// reverse search, and it will be double ended if a forward/reverse
3486 /// search yields the same elements.
3488 /// For iterating from the front, the [`split_terminator`] method can be
3491 /// [`split_terminator`]: str::split_terminator
3496 /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect();
3497 /// assert_eq!(v, ["B", "A"]);
3499 /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
3500 /// assert_eq!(v, ["", "B", "", "A"]);
3502 #[stable(feature = "rust1", since = "1.0.0")]
3504 pub fn rsplit_terminator<'a, P>(&'a self, pat: P) -> RSplitTerminator<'a, P>
3506 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3508 RSplitTerminator(self.split_terminator(pat).0)
3511 /// An iterator over substrings of the given string slice, separated by a
3512 /// pattern, restricted to returning at most `n` items.
3514 /// If `n` substrings are returned, the last substring (the `n`th substring)
3515 /// will contain the remainder of the string.
3517 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3518 /// function or closure that determines if a character matches.
3520 /// [pattern]: self::pattern
3522 /// # Iterator behavior
3524 /// The returned iterator will not be double ended, because it is
3525 /// not efficient to support.
3527 /// If the pattern allows a reverse search, the [`rsplitn`] method can be
3530 /// [`rsplitn`]: str::rsplitn
3534 /// Simple patterns:
3537 /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect();
3538 /// assert_eq!(v, ["Mary", "had", "a little lambda"]);
3540 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect();
3541 /// assert_eq!(v, ["lion", "", "tigerXleopard"]);
3543 /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
3544 /// assert_eq!(v, ["abcXdef"]);
3546 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
3547 /// assert_eq!(v, [""]);
3550 /// A more complex pattern, using a closure:
3553 /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect();
3554 /// assert_eq!(v, ["abc", "defXghi"]);
3556 #[stable(feature = "rust1", since = "1.0.0")]
3558 pub fn splitn<'a, P: Pattern<'a>>(&'a self, n: usize, pat: P) -> SplitN<'a, P> {
3559 SplitN(SplitNInternal { iter: self.split(pat).0, count: n })
3562 /// An iterator over substrings of this string slice, separated by a
3563 /// pattern, starting from the end of the string, restricted to returning
3564 /// at most `n` items.
3566 /// If `n` substrings are returned, the last substring (the `n`th substring)
3567 /// will contain the remainder of the string.
3569 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3570 /// function or closure that determines if a character matches.
3572 /// [pattern]: self::pattern
3574 /// # Iterator behavior
3576 /// The returned iterator will not be double ended, because it is not
3577 /// efficient to support.
3579 /// For splitting from the front, the [`splitn`] method can be used.
3581 /// [`splitn`]: str::splitn
3585 /// Simple patterns:
3588 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect();
3589 /// assert_eq!(v, ["lamb", "little", "Mary had a"]);
3591 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect();
3592 /// assert_eq!(v, ["leopard", "tiger", "lionX"]);
3594 /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect();
3595 /// assert_eq!(v, ["leopard", "lion::tiger"]);
3598 /// A more complex pattern, using a closure:
3601 /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect();
3602 /// assert_eq!(v, ["ghi", "abc1def"]);
3604 #[stable(feature = "rust1", since = "1.0.0")]
3606 pub fn rsplitn<'a, P>(&'a self, n: usize, pat: P) -> RSplitN<'a, P>
3608 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3610 RSplitN(self.splitn(n, pat).0)
3613 /// Splits the string on the first occurrence of the specified delimiter and
3614 /// returns prefix before delimiter and suffix after delimiter.
3619 /// #![feature(str_split_once)]
3621 /// assert_eq!("cfg".split_once('='), None);
3622 /// assert_eq!("cfg=foo".split_once('='), Some(("cfg", "foo")));
3623 /// assert_eq!("cfg=foo=bar".split_once('='), Some(("cfg", "foo=bar")));
3625 #[unstable(feature = "str_split_once", reason = "newly added", issue = "74773")]
3627 pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> {
3628 let (start, end) = delimiter.into_searcher(self).next_match()?;
3629 Some((&self[..start], &self[end..]))
3632 /// Splits the string on the last occurrence of the specified delimiter and
3633 /// returns prefix before delimiter and suffix after delimiter.
3638 /// #![feature(str_split_once)]
3640 /// assert_eq!("cfg".rsplit_once('='), None);
3641 /// assert_eq!("cfg=foo".rsplit_once('='), Some(("cfg", "foo")));
3642 /// assert_eq!("cfg=foo=bar".rsplit_once('='), Some(("cfg=foo", "bar")));
3644 #[unstable(feature = "str_split_once", reason = "newly added", issue = "74773")]
3646 pub fn rsplit_once<'a, P>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)>
3648 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3650 let (start, end) = delimiter.into_searcher(self).next_match_back()?;
3651 Some((&self[..start], &self[end..]))
3654 /// An iterator over the disjoint matches of a pattern within the given string
3657 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3658 /// function or closure that determines if a character matches.
3660 /// [pattern]: self::pattern
3662 /// # Iterator behavior
3664 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3665 /// allows a reverse search and forward/reverse search yields the same
3666 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3668 /// If the pattern allows a reverse search but its results might differ
3669 /// from a forward search, the [`rmatches`] method can be used.
3671 /// [`rmatches`]: str::matches
3678 /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
3679 /// assert_eq!(v, ["abc", "abc", "abc"]);
3681 /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect();
3682 /// assert_eq!(v, ["1", "2", "3"]);
3684 #[stable(feature = "str_matches", since = "1.2.0")]
3686 pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
3687 Matches(MatchesInternal(pat.into_searcher(self)))
3690 /// An iterator over the disjoint matches of a pattern within this string slice,
3691 /// yielded in reverse order.
3693 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3694 /// function or closure that determines if a character matches.
3696 /// [pattern]: self::pattern
3698 /// # Iterator behavior
3700 /// The returned iterator requires that the pattern supports a reverse
3701 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3702 /// search yields the same elements.
3704 /// For iterating from the front, the [`matches`] method can be used.
3706 /// [`matches`]: str::matches
3713 /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
3714 /// assert_eq!(v, ["abc", "abc", "abc"]);
3716 /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect();
3717 /// assert_eq!(v, ["3", "2", "1"]);
3719 #[stable(feature = "str_matches", since = "1.2.0")]
3721 pub fn rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P>
3723 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3725 RMatches(self.matches(pat).0)
3728 /// An iterator over the disjoint matches of a pattern within this string
3729 /// slice as well as the index that the match starts at.
3731 /// For matches of `pat` within `self` that overlap, only the indices
3732 /// corresponding to the first match are returned.
3734 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3735 /// function or closure that determines if a character matches.
3737 /// [pattern]: self::pattern
3739 /// # Iterator behavior
3741 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3742 /// allows a reverse search and forward/reverse search yields the same
3743 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3745 /// If the pattern allows a reverse search but its results might differ
3746 /// from a forward search, the [`rmatch_indices`] method can be used.
3748 /// [`rmatch_indices`]: str::match_indices
3755 /// let v: Vec<_> = "abcXXXabcYYYabc".match_indices("abc").collect();
3756 /// assert_eq!(v, [(0, "abc"), (6, "abc"), (12, "abc")]);
3758 /// let v: Vec<_> = "1abcabc2".match_indices("abc").collect();
3759 /// assert_eq!(v, [(1, "abc"), (4, "abc")]);
3761 /// let v: Vec<_> = "ababa".match_indices("aba").collect();
3762 /// assert_eq!(v, [(0, "aba")]); // only the first `aba`
3764 #[stable(feature = "str_match_indices", since = "1.5.0")]
3766 pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
3767 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
3770 /// An iterator over the disjoint matches of a pattern within `self`,
3771 /// yielded in reverse order along with the index of the match.
3773 /// For matches of `pat` within `self` that overlap, only the indices
3774 /// corresponding to the last match are returned.
3776 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3777 /// function or closure that determines if a character matches.
3779 /// [pattern]: self::pattern
3781 /// # Iterator behavior
3783 /// The returned iterator requires that the pattern supports a reverse
3784 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3785 /// search yields the same elements.
3787 /// For iterating from the front, the [`match_indices`] method can be used.
3789 /// [`match_indices`]: str::match_indices
3796 /// let v: Vec<_> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
3797 /// assert_eq!(v, [(12, "abc"), (6, "abc"), (0, "abc")]);
3799 /// let v: Vec<_> = "1abcabc2".rmatch_indices("abc").collect();
3800 /// assert_eq!(v, [(4, "abc"), (1, "abc")]);
3802 /// let v: Vec<_> = "ababa".rmatch_indices("aba").collect();
3803 /// assert_eq!(v, [(2, "aba")]); // only the last `aba`
3805 #[stable(feature = "str_match_indices", since = "1.5.0")]
3807 pub fn rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P>
3809 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3811 RMatchIndices(self.match_indices(pat).0)
3814 /// Returns a string slice with leading and trailing whitespace removed.
3816 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3817 /// Core Property `White_Space`.
3824 /// let s = " Hello\tworld\t";
3826 /// assert_eq!("Hello\tworld", s.trim());
3828 #[must_use = "this returns the trimmed string as a slice, \
3829 without modifying the original"]
3830 #[stable(feature = "rust1", since = "1.0.0")]
3831 pub fn trim(&self) -> &str {
3832 self.trim_matches(|c: char| c.is_whitespace())
3835 /// Returns a string slice with leading whitespace removed.
3837 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3838 /// Core Property `White_Space`.
3840 /// # Text directionality
3842 /// A string is a sequence of bytes. `start` in this context means the first
3843 /// position of that byte string; for a left-to-right language like English or
3844 /// Russian, this will be left side, and for right-to-left languages like
3845 /// Arabic or Hebrew, this will be the right side.
3852 /// let s = " Hello\tworld\t";
3853 /// assert_eq!("Hello\tworld\t", s.trim_start());
3859 /// let s = " English ";
3860 /// assert!(Some('E') == s.trim_start().chars().next());
3862 /// let s = " עברית ";
3863 /// assert!(Some('ע') == s.trim_start().chars().next());
3865 #[must_use = "this returns the trimmed string as a new slice, \
3866 without modifying the original"]
3867 #[stable(feature = "trim_direction", since = "1.30.0")]
3868 pub fn trim_start(&self) -> &str {
3869 self.trim_start_matches(|c: char| c.is_whitespace())
3872 /// Returns a string slice with trailing whitespace removed.
3874 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3875 /// Core Property `White_Space`.
3877 /// # Text directionality
3879 /// A string is a sequence of bytes. `end` in this context means the last
3880 /// position of that byte string; for a left-to-right language like English or
3881 /// Russian, this will be right side, and for right-to-left languages like
3882 /// Arabic or Hebrew, this will be the left side.
3889 /// let s = " Hello\tworld\t";
3890 /// assert_eq!(" Hello\tworld", s.trim_end());
3896 /// let s = " English ";
3897 /// assert!(Some('h') == s.trim_end().chars().rev().next());
3899 /// let s = " עברית ";
3900 /// assert!(Some('ת') == s.trim_end().chars().rev().next());
3902 #[must_use = "this returns the trimmed string as a new slice, \
3903 without modifying the original"]
3904 #[stable(feature = "trim_direction", since = "1.30.0")]
3905 pub fn trim_end(&self) -> &str {
3906 self.trim_end_matches(|c: char| c.is_whitespace())
3909 /// Returns a string slice with leading whitespace removed.
3911 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3912 /// Core Property `White_Space`.
3914 /// # Text directionality
3916 /// A string is a sequence of bytes. 'Left' in this context means the first
3917 /// position of that byte string; for a language like Arabic or Hebrew
3918 /// which are 'right to left' rather than 'left to right', this will be
3919 /// the _right_ side, not the left.
3926 /// let s = " Hello\tworld\t";
3928 /// assert_eq!("Hello\tworld\t", s.trim_left());
3934 /// let s = " English";
3935 /// assert!(Some('E') == s.trim_left().chars().next());
3937 /// let s = " עברית";
3938 /// assert!(Some('ע') == s.trim_left().chars().next());
3940 #[stable(feature = "rust1", since = "1.0.0")]
3943 reason = "superseded by `trim_start`",
3944 suggestion = "trim_start"
3946 pub fn trim_left(&self) -> &str {
3950 /// Returns a string slice with trailing whitespace removed.
3952 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3953 /// Core Property `White_Space`.
3955 /// # Text directionality
3957 /// A string is a sequence of bytes. 'Right' in this context means the last
3958 /// position of that byte string; for a language like Arabic or Hebrew
3959 /// which are 'right to left' rather than 'left to right', this will be
3960 /// the _left_ side, not the right.
3967 /// let s = " Hello\tworld\t";
3969 /// assert_eq!(" Hello\tworld", s.trim_right());
3975 /// let s = "English ";
3976 /// assert!(Some('h') == s.trim_right().chars().rev().next());
3978 /// let s = "עברית ";
3979 /// assert!(Some('ת') == s.trim_right().chars().rev().next());
3981 #[stable(feature = "rust1", since = "1.0.0")]
3984 reason = "superseded by `trim_end`",
3985 suggestion = "trim_end"
3987 pub fn trim_right(&self) -> &str {
3991 /// Returns a string slice with all prefixes and suffixes that match a
3992 /// pattern repeatedly removed.
3994 /// The [pattern] can be a [`char`], a slice of [`char`]s, or a function
3995 /// or closure that determines if a character matches.
3997 /// [pattern]: self::pattern
4001 /// Simple patterns:
4004 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
4005 /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar");
4007 /// let x: &[_] = &['1', '2'];
4008 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
4011 /// A more complex pattern, using a closure:
4014 /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar");
4016 #[must_use = "this returns the trimmed string as a new slice, \
4017 without modifying the original"]
4018 #[stable(feature = "rust1", since = "1.0.0")]
4019 pub fn trim_matches<'a, P>(&'a self, pat: P) -> &'a str
4021 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
4025 let mut matcher = pat.into_searcher(self);
4026 if let Some((a, b)) = matcher.next_reject() {
4028 j = b; // Remember earliest known match, correct it below if
4029 // last match is different
4031 if let Some((_, b)) = matcher.next_reject_back() {
4034 // SAFETY: `Searcher` is known to return valid indices.
4035 unsafe { self.get_unchecked(i..j) }
4038 /// Returns a string slice with all prefixes that match a pattern
4039 /// repeatedly removed.
4041 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4042 /// function or closure that determines if a character matches.
4044 /// [pattern]: self::pattern
4046 /// # Text directionality
4048 /// A string is a sequence of bytes. `start` in this context means the first
4049 /// position of that byte string; for a left-to-right language like English or
4050 /// Russian, this will be left side, and for right-to-left languages like
4051 /// Arabic or Hebrew, this will be the right side.
4058 /// assert_eq!("11foo1bar11".trim_start_matches('1'), "foo1bar11");
4059 /// assert_eq!("123foo1bar123".trim_start_matches(char::is_numeric), "foo1bar123");
4061 /// let x: &[_] = &['1', '2'];
4062 /// assert_eq!("12foo1bar12".trim_start_matches(x), "foo1bar12");
4064 #[must_use = "this returns the trimmed string as a new slice, \
4065 without modifying the original"]
4066 #[stable(feature = "trim_direction", since = "1.30.0")]
4067 pub fn trim_start_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
4068 let mut i = self.len();
4069 let mut matcher = pat.into_searcher(self);
4070 if let Some((a, _)) = matcher.next_reject() {
4073 // SAFETY: `Searcher` is known to return valid indices.
4074 unsafe { self.get_unchecked(i..self.len()) }
4077 /// Returns a string slice with the prefix removed.
4079 /// If the string starts with the pattern `prefix`, `Some` is returned with the substring where
4080 /// the prefix is removed. Unlike `trim_start_matches`, this method removes the prefix exactly
4083 /// If the string does not start with `prefix`, `None` is returned.
4085 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4086 /// function or closure that determines if a character matches.
4088 /// [pattern]: self::pattern
4093 /// assert_eq!("foo:bar".strip_prefix("foo:"), Some("bar"));
4094 /// assert_eq!("foo:bar".strip_prefix("bar"), None);
4095 /// assert_eq!("foofoo".strip_prefix("foo"), Some("foo"));
4097 #[must_use = "this returns the remaining substring as a new slice, \
4098 without modifying the original"]
4099 #[stable(feature = "str_strip", since = "1.45.0")]
4100 pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> {
4101 prefix.strip_prefix_of(self)
4104 /// Returns a string slice with the suffix removed.
4106 /// If the string ends with the pattern `suffix`, `Some` is returned with the substring where
4107 /// the suffix is removed. Unlike `trim_end_matches`, this method removes the suffix exactly
4110 /// If the string does not end with `suffix`, `None` is returned.
4112 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4113 /// function or closure that determines if a character matches.
4115 /// [pattern]: self::pattern
4120 /// assert_eq!("bar:foo".strip_suffix(":foo"), Some("bar"));
4121 /// assert_eq!("bar:foo".strip_suffix("bar"), None);
4122 /// assert_eq!("foofoo".strip_suffix("foo"), Some("foo"));
4124 #[must_use = "this returns the remaining substring as a new slice, \
4125 without modifying the original"]
4126 #[stable(feature = "str_strip", since = "1.45.0")]
4127 pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a str>
4130 <P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
4132 suffix.strip_suffix_of(self)
4135 /// Returns a string slice with all suffixes that match a pattern
4136 /// repeatedly removed.
4138 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4139 /// function or closure that determines if a character matches.
4141 /// [pattern]: self::pattern
4143 /// # Text directionality
4145 /// A string is a sequence of bytes. `end` in this context means the last
4146 /// position of that byte string; for a left-to-right language like English or
4147 /// Russian, this will be right side, and for right-to-left languages like
4148 /// Arabic or Hebrew, this will be the left side.
4152 /// Simple patterns:
4155 /// assert_eq!("11foo1bar11".trim_end_matches('1'), "11foo1bar");
4156 /// assert_eq!("123foo1bar123".trim_end_matches(char::is_numeric), "123foo1bar");
4158 /// let x: &[_] = &['1', '2'];
4159 /// assert_eq!("12foo1bar12".trim_end_matches(x), "12foo1bar");
4162 /// A more complex pattern, using a closure:
4165 /// assert_eq!("1fooX".trim_end_matches(|c| c == '1' || c == 'X'), "1foo");
4167 #[must_use = "this returns the trimmed string as a new slice, \
4168 without modifying the original"]
4169 #[stable(feature = "trim_direction", since = "1.30.0")]
4170 pub fn trim_end_matches<'a, P>(&'a self, pat: P) -> &'a str
4172 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
4175 let mut matcher = pat.into_searcher(self);
4176 if let Some((_, b)) = matcher.next_reject_back() {
4179 // SAFETY: `Searcher` is known to return valid indices.
4180 unsafe { self.get_unchecked(0..j) }
4183 /// Returns a string slice with all prefixes that match a pattern
4184 /// repeatedly removed.
4186 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4187 /// function or closure that determines if a character matches.
4189 /// [pattern]: self::pattern
4191 /// # Text directionality
4193 /// A string is a sequence of bytes. 'Left' in this context means the first
4194 /// position of that byte string; for a language like Arabic or Hebrew
4195 /// which are 'right to left' rather than 'left to right', this will be
4196 /// the _right_ side, not the left.
4203 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
4204 /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123");
4206 /// let x: &[_] = &['1', '2'];
4207 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
4209 #[stable(feature = "rust1", since = "1.0.0")]
4212 reason = "superseded by `trim_start_matches`",
4213 suggestion = "trim_start_matches"
4215 pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
4216 self.trim_start_matches(pat)
4219 /// Returns a string slice with all suffixes that match a pattern
4220 /// repeatedly removed.
4222 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4223 /// function or closure that determines if a character matches.
4225 /// [pattern]: self::pattern
4227 /// # Text directionality
4229 /// A string is a sequence of bytes. 'Right' in this context means the last
4230 /// position of that byte string; for a language like Arabic or Hebrew
4231 /// which are 'right to left' rather than 'left to right', this will be
4232 /// the _left_ side, not the right.
4236 /// Simple patterns:
4239 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
4240 /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar");
4242 /// let x: &[_] = &['1', '2'];
4243 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
4246 /// A more complex pattern, using a closure:
4249 /// assert_eq!("1fooX".trim_right_matches(|c| c == '1' || c == 'X'), "1foo");
4251 #[stable(feature = "rust1", since = "1.0.0")]
4254 reason = "superseded by `trim_end_matches`",
4255 suggestion = "trim_end_matches"
4257 pub fn trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str
4259 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
4261 self.trim_end_matches(pat)
4264 /// Parses this string slice into another type.
4266 /// Because `parse` is so general, it can cause problems with type
4267 /// inference. As such, `parse` is one of the few times you'll see
4268 /// the syntax affectionately known as the 'turbofish': `::<>`. This
4269 /// helps the inference algorithm understand specifically which type
4270 /// you're trying to parse into.
4272 /// `parse` can parse any type that implements the [`FromStr`] trait.
4277 /// Will return [`Err`] if it's not possible to parse this string slice into
4278 /// the desired type.
4280 /// [`Err`]: FromStr::Err
4287 /// let four: u32 = "4".parse().unwrap();
4289 /// assert_eq!(4, four);
4292 /// Using the 'turbofish' instead of annotating `four`:
4295 /// let four = "4".parse::<u32>();
4297 /// assert_eq!(Ok(4), four);
4300 /// Failing to parse:
4303 /// let nope = "j".parse::<u32>();
4305 /// assert!(nope.is_err());
4308 #[stable(feature = "rust1", since = "1.0.0")]
4309 pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
4310 FromStr::from_str(self)
4313 /// Checks if all characters in this string are within the ASCII range.
4318 /// let ascii = "hello!\n";
4319 /// let non_ascii = "Grüße, Jürgen ❤";
4321 /// assert!(ascii.is_ascii());
4322 /// assert!(!non_ascii.is_ascii());
4324 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4326 pub fn is_ascii(&self) -> bool {
4327 // We can treat each byte as character here: all multibyte characters
4328 // start with a byte that is not in the ascii range, so we will stop
4330 self.as_bytes().is_ascii()
4333 /// Checks that two strings are an ASCII case-insensitive match.
4335 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
4336 /// but without allocating and copying temporaries.
4341 /// assert!("Ferris".eq_ignore_ascii_case("FERRIS"));
4342 /// assert!("Ferrös".eq_ignore_ascii_case("FERRöS"));
4343 /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
4345 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4347 pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
4348 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
4351 /// Converts this string to its ASCII upper case equivalent in-place.
4353 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
4354 /// but non-ASCII letters are unchanged.
4356 /// To return a new uppercased value without modifying the existing one, use
4357 /// [`to_ascii_uppercase`].
4359 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
4364 /// let mut s = String::from("Grüße, Jürgen ❤");
4366 /// s.make_ascii_uppercase();
4368 /// assert_eq!("GRüßE, JüRGEN ❤", s);
4370 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4371 pub fn make_ascii_uppercase(&mut self) {
4372 // SAFETY: safe because we transmute two types with the same layout.
4373 let me = unsafe { self.as_bytes_mut() };
4374 me.make_ascii_uppercase()
4377 /// Converts this string to its ASCII lower case equivalent in-place.
4379 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
4380 /// but non-ASCII letters are unchanged.
4382 /// To return a new lowercased value without modifying the existing one, use
4383 /// [`to_ascii_lowercase`].
4385 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
4390 /// let mut s = String::from("GRÜßE, JÜRGEN ❤");
4392 /// s.make_ascii_lowercase();
4394 /// assert_eq!("grÜße, jÜrgen ❤", s);
4396 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4397 pub fn make_ascii_lowercase(&mut self) {
4398 // SAFETY: safe because we transmute two types with the same layout.
4399 let me = unsafe { self.as_bytes_mut() };
4400 me.make_ascii_lowercase()
4403 /// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
4405 /// Note: only extended grapheme codepoints that begin the string will be
4413 /// for c in "❤\n!".escape_debug() {
4414 /// print!("{}", c);
4419 /// Using `println!` directly:
4422 /// println!("{}", "❤\n!".escape_debug());
4426 /// Both are equivalent to:
4429 /// println!("❤\\n!");
4432 /// Using `to_string`:
4435 /// assert_eq!("❤\n!".escape_debug().to_string(), "❤\\n!");
4437 #[stable(feature = "str_escape", since = "1.34.0")]
4438 pub fn escape_debug(&self) -> EscapeDebug<'_> {
4439 let mut chars = self.chars();
4443 .map(|first| first.escape_debug_ext(true))
4446 .chain(chars.flat_map(CharEscapeDebugContinue)),
4450 /// Return an iterator that escapes each char in `self` with [`char::escape_default`].
4457 /// for c in "❤\n!".escape_default() {
4458 /// print!("{}", c);
4463 /// Using `println!` directly:
4466 /// println!("{}", "❤\n!".escape_default());
4470 /// Both are equivalent to:
4473 /// println!("\\u{{2764}}\\n!");
4476 /// Using `to_string`:
4479 /// assert_eq!("❤\n!".escape_default().to_string(), "\\u{2764}\\n!");
4481 #[stable(feature = "str_escape", since = "1.34.0")]
4482 pub fn escape_default(&self) -> EscapeDefault<'_> {
4483 EscapeDefault { inner: self.chars().flat_map(CharEscapeDefault) }
4486 /// Return an iterator that escapes each char in `self` with [`char::escape_unicode`].
4493 /// for c in "❤\n!".escape_unicode() {
4494 /// print!("{}", c);
4499 /// Using `println!` directly:
4502 /// println!("{}", "❤\n!".escape_unicode());
4506 /// Both are equivalent to:
4509 /// println!("\\u{{2764}}\\u{{a}}\\u{{21}}");
4512 /// Using `to_string`:
4515 /// assert_eq!("❤\n!".escape_unicode().to_string(), "\\u{2764}\\u{a}\\u{21}");
4517 #[stable(feature = "str_escape", since = "1.34.0")]
4518 pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
4519 EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) }
4525 struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
4526 c.escape_debug_ext(false)
4530 struct CharEscapeUnicode impl Fn = |c: char| -> char::EscapeUnicode {
4534 struct CharEscapeDefault impl Fn = |c: char| -> char::EscapeDefault {
4539 #[stable(feature = "rust1", since = "1.0.0")]
4540 impl AsRef<[u8]> for str {
4542 fn as_ref(&self) -> &[u8] {
4547 #[stable(feature = "rust1", since = "1.0.0")]
4548 impl Default for &str {
4549 /// Creates an empty str
4550 fn default() -> Self {
4555 #[stable(feature = "default_mut_str", since = "1.28.0")]
4556 impl Default for &mut str {
4557 /// Creates an empty mutable str
4558 fn default() -> Self {
4559 // SAFETY: The empty string is valid UTF-8.
4560 unsafe { from_utf8_unchecked_mut(&mut []) }
4564 /// An iterator over the non-whitespace substrings of a string,
4565 /// separated by any amount of whitespace.
4567 /// This struct is created by the [`split_whitespace`] method on [`str`].
4568 /// See its documentation for more.
4570 /// [`split_whitespace`]: str::split_whitespace
4571 #[stable(feature = "split_whitespace", since = "1.1.0")]
4572 #[derive(Clone, Debug)]
4573 pub struct SplitWhitespace<'a> {
4574 inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
4577 /// An iterator over the non-ASCII-whitespace substrings of a string,
4578 /// separated by any amount of ASCII whitespace.
4580 /// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
4581 /// See its documentation for more.
4583 /// [`split_ascii_whitespace`]: str::split_ascii_whitespace
4584 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4585 #[derive(Clone, Debug)]
4586 pub struct SplitAsciiWhitespace<'a> {
4587 inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, BytesIsNotEmpty>, UnsafeBytesToStr>,
4590 /// An iterator over the substrings of a string,
4591 /// terminated by a substring matching to a predicate function
4592 /// Unlike `Split`, it contains the matched part as a terminator
4593 /// of the subslice.
4595 /// This struct is created by the [`split_inclusive`] method on [`str`].
4596 /// See its documentation for more.
4598 /// [`split_inclusive`]: str::split_inclusive
4599 #[unstable(feature = "split_inclusive", issue = "72360")]
4600 pub struct SplitInclusive<'a, P: Pattern<'a>>(SplitInternal<'a, P>);
4604 struct IsWhitespace impl Fn = |c: char| -> bool {
4609 struct IsAsciiWhitespace impl Fn = |byte: &u8| -> bool {
4610 byte.is_ascii_whitespace()
4614 struct IsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b str| -> bool {
4619 struct BytesIsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b [u8]| -> bool {
4624 struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
4626 unsafe { from_utf8_unchecked(bytes) }
4630 #[stable(feature = "split_whitespace", since = "1.1.0")]
4631 impl<'a> Iterator for SplitWhitespace<'a> {
4632 type Item = &'a str;
4635 fn next(&mut self) -> Option<&'a str> {
4640 fn size_hint(&self) -> (usize, Option<usize>) {
4641 self.inner.size_hint()
4645 fn last(mut self) -> Option<&'a str> {
4650 #[stable(feature = "split_whitespace", since = "1.1.0")]
4651 impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
4653 fn next_back(&mut self) -> Option<&'a str> {
4654 self.inner.next_back()
4658 #[stable(feature = "fused", since = "1.26.0")]
4659 impl FusedIterator for SplitWhitespace<'_> {}
4661 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4662 impl<'a> Iterator for SplitAsciiWhitespace<'a> {
4663 type Item = &'a str;
4666 fn next(&mut self) -> Option<&'a str> {
4671 fn size_hint(&self) -> (usize, Option<usize>) {
4672 self.inner.size_hint()
4676 fn last(mut self) -> Option<&'a str> {
4681 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4682 impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> {
4684 fn next_back(&mut self) -> Option<&'a str> {
4685 self.inner.next_back()
4689 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4690 impl FusedIterator for SplitAsciiWhitespace<'_> {}
4692 #[unstable(feature = "split_inclusive", issue = "72360")]
4693 impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> {
4694 type Item = &'a str;
4697 fn next(&mut self) -> Option<&'a str> {
4698 self.0.next_inclusive()
4702 #[unstable(feature = "split_inclusive", issue = "72360")]
4703 impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> {
4704 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4705 f.debug_struct("SplitInclusive").field("0", &self.0).finish()
4709 // FIXME(#26925) Remove in favor of `#[derive(Clone)]`
4710 #[unstable(feature = "split_inclusive", issue = "72360")]
4711 impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitInclusive<'a, P> {
4712 fn clone(&self) -> Self {
4713 SplitInclusive(self.0.clone())
4717 #[unstable(feature = "split_inclusive", issue = "72360")]
4718 impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator
4719 for SplitInclusive<'a, P>
4722 fn next_back(&mut self) -> Option<&'a str> {
4723 self.0.next_back_inclusive()
4727 #[unstable(feature = "split_inclusive", issue = "72360")]
4728 impl<'a, P: Pattern<'a>> FusedIterator for SplitInclusive<'a, P> {}
4730 /// An iterator of [`u16`] over the string encoded as UTF-16.
4732 /// This struct is created by the [`encode_utf16`] method on [`str`].
4733 /// See its documentation for more.
4735 /// [`encode_utf16`]: str::encode_utf16
4737 #[stable(feature = "encode_utf16", since = "1.8.0")]
4738 pub struct EncodeUtf16<'a> {
4743 #[stable(feature = "collection_debug", since = "1.17.0")]
4744 impl fmt::Debug for EncodeUtf16<'_> {
4745 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4746 f.pad("EncodeUtf16 { .. }")
4750 #[stable(feature = "encode_utf16", since = "1.8.0")]
4751 impl<'a> Iterator for EncodeUtf16<'a> {
4755 fn next(&mut self) -> Option<u16> {
4756 if self.extra != 0 {
4757 let tmp = self.extra;
4762 let mut buf = [0; 2];
4763 self.chars.next().map(|ch| {
4764 let n = ch.encode_utf16(&mut buf).len();
4766 self.extra = buf[1];
4773 fn size_hint(&self) -> (usize, Option<usize>) {
4774 let (low, high) = self.chars.size_hint();
4775 // every char gets either one u16 or two u16,
4776 // so this iterator is between 1 or 2 times as
4777 // long as the underlying iterator.
4778 (low, high.and_then(|n| n.checked_mul(2)))
4782 #[stable(feature = "fused", since = "1.26.0")]
4783 impl FusedIterator for EncodeUtf16<'_> {}
4785 /// The return type of [`str::escape_debug`].
4786 #[stable(feature = "str_escape", since = "1.34.0")]
4787 #[derive(Clone, Debug)]
4788 pub struct EscapeDebug<'a> {
4790 Flatten<option::IntoIter<char::EscapeDebug>>,
4791 FlatMap<Chars<'a>, char::EscapeDebug, CharEscapeDebugContinue>,
4795 /// The return type of [`str::escape_default`].
4796 #[stable(feature = "str_escape", since = "1.34.0")]
4797 #[derive(Clone, Debug)]
4798 pub struct EscapeDefault<'a> {
4799 inner: FlatMap<Chars<'a>, char::EscapeDefault, CharEscapeDefault>,
4802 /// The return type of [`str::escape_unicode`].
4803 #[stable(feature = "str_escape", since = "1.34.0")]
4804 #[derive(Clone, Debug)]
4805 pub struct EscapeUnicode<'a> {
4806 inner: FlatMap<Chars<'a>, char::EscapeUnicode, CharEscapeUnicode>,
4809 macro_rules! escape_types_impls {
4810 ($( $Name: ident ),+) => {$(
4811 #[stable(feature = "str_escape", since = "1.34.0")]
4812 impl<'a> fmt::Display for $Name<'a> {
4813 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4814 self.clone().try_for_each(|c| f.write_char(c))
4818 #[stable(feature = "str_escape", since = "1.34.0")]
4819 impl<'a> Iterator for $Name<'a> {
4823 fn next(&mut self) -> Option<char> { self.inner.next() }
4826 fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
4829 fn try_fold<Acc, Fold, R>(&mut self, init: Acc, fold: Fold) -> R where
4830 Self: Sized, Fold: FnMut(Acc, Self::Item) -> R, R: Try<Ok=Acc>
4832 self.inner.try_fold(init, fold)
4836 fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
4837 where Fold: FnMut(Acc, Self::Item) -> Acc,
4839 self.inner.fold(init, fold)
4843 #[stable(feature = "str_escape", since = "1.34.0")]
4844 impl<'a> FusedIterator for $Name<'a> {}
4848 escape_types_impls!(EscapeDebug, EscapeDefault, EscapeUnicode);