1 // ignore-tidy-filelength
3 //! String manipulation.
5 //! For more details, see the [`std::str`] module.
9 #![stable(feature = "rust1", since = "1.0.0")]
11 use self::pattern::Pattern;
12 use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
15 use crate::fmt::{self, Write};
16 use crate::iter::{Chain, FlatMap, Flatten};
17 use crate::iter::{Copied, Filter, FusedIterator, Map, TrustedLen, TrustedRandomAccess};
21 use crate::slice::{self, SliceIndex, Split as SliceSplit};
25 #[unstable(feature = "str_internals", issue = "none")]
26 #[allow(missing_docs)]
29 /// Parse a value from a string
31 /// `FromStr`'s [`from_str`] method is often used implicitly, through
32 /// [`str`]'s [`parse`] method. See [`parse`]'s documentation for examples.
34 /// [`from_str`]: FromStr::from_str
35 /// [`parse`]: str::parse
37 /// `FromStr` does not have a lifetime parameter, and so you can only parse types
38 /// that do not contain a lifetime parameter themselves. In other words, you can
39 /// parse an `i32` with `FromStr`, but not a `&i32`. You can parse a struct that
40 /// contains an `i32`, but not one that contains an `&i32`.
44 /// Basic implementation of `FromStr` on an example `Point` type:
47 /// use std::str::FromStr;
48 /// use std::num::ParseIntError;
50 /// #[derive(Debug, PartialEq)]
56 /// impl FromStr for Point {
57 /// type Err = ParseIntError;
59 /// fn from_str(s: &str) -> Result<Self, Self::Err> {
60 /// let coords: Vec<&str> = s.trim_matches(|p| p == '(' || p == ')' )
64 /// let x_fromstr = coords[0].parse::<i32>()?;
65 /// let y_fromstr = coords[1].parse::<i32>()?;
67 /// Ok(Point { x: x_fromstr, y: y_fromstr })
71 /// let p = Point::from_str("(1,2)");
72 /// assert_eq!(p.unwrap(), Point{ x: 1, y: 2} )
74 #[stable(feature = "rust1", since = "1.0.0")]
75 pub trait FromStr: Sized {
76 /// The associated error which can be returned from parsing.
77 #[stable(feature = "rust1", since = "1.0.0")]
80 /// Parses a string `s` to return a value of this type.
82 /// If parsing succeeds, return the value inside [`Ok`], otherwise
83 /// when the string is ill-formatted return an error specific to the
84 /// inside [`Err`]. The error type is specific to implementation of the trait.
86 /// [`Ok`]: ../../std/result/enum.Result.html#variant.Ok
87 /// [`Err`]: ../../std/result/enum.Result.html#variant.Err
91 /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
93 /// [ithirtytwo]: ../../std/primitive.i32.html
96 /// use std::str::FromStr;
99 /// let x = i32::from_str(s).unwrap();
101 /// assert_eq!(5, x);
103 #[stable(feature = "rust1", since = "1.0.0")]
104 fn from_str(s: &str) -> Result<Self, Self::Err>;
107 #[stable(feature = "rust1", since = "1.0.0")]
108 impl FromStr for bool {
109 type Err = ParseBoolError;
111 /// Parse a `bool` from a string.
113 /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
114 /// actually be parseable.
119 /// use std::str::FromStr;
121 /// assert_eq!(FromStr::from_str("true"), Ok(true));
122 /// assert_eq!(FromStr::from_str("false"), Ok(false));
123 /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
126 /// Note, in many cases, the `.parse()` method on `str` is more proper.
129 /// assert_eq!("true".parse(), Ok(true));
130 /// assert_eq!("false".parse(), Ok(false));
131 /// assert!("not even a boolean".parse::<bool>().is_err());
134 fn from_str(s: &str) -> Result<bool, ParseBoolError> {
137 "false" => Ok(false),
138 _ => Err(ParseBoolError { _priv: () }),
143 /// An error returned when parsing a `bool` using [`from_str`] fails
145 /// [`from_str`]: FromStr::from_str
146 #[derive(Debug, Clone, PartialEq, Eq)]
147 #[stable(feature = "rust1", since = "1.0.0")]
148 pub struct ParseBoolError {
152 #[stable(feature = "rust1", since = "1.0.0")]
153 impl fmt::Display for ParseBoolError {
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 "provided string was not `true` or `false`".fmt(f)
160 Section: Creating a string
163 /// Errors which can occur when attempting to interpret a sequence of [`u8`]
166 /// As such, the `from_utf8` family of functions and methods for both [`String`]s
167 /// and [`&str`]s make use of this error, for example.
169 /// [`String`]: ../../std/string/struct.String.html#method.from_utf8
170 /// [`&str`]: from_utf8
174 /// This error type’s methods can be used to create functionality
175 /// similar to `String::from_utf8_lossy` without allocating heap memory:
178 /// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) {
180 /// match std::str::from_utf8(input) {
186 /// let (valid, after_valid) = input.split_at(error.valid_up_to());
188 /// push(std::str::from_utf8_unchecked(valid))
190 /// push("\u{FFFD}");
192 /// if let Some(invalid_sequence_length) = error.error_len() {
193 /// input = &after_valid[invalid_sequence_length..]
202 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
203 #[stable(feature = "rust1", since = "1.0.0")]
204 pub struct Utf8Error {
206 error_len: Option<u8>,
210 /// Returns the index in the given string up to which valid UTF-8 was
213 /// It is the maximum index such that `from_utf8(&input[..index])`
214 /// would return `Ok(_)`.
223 /// // some invalid bytes, in a vector
224 /// let sparkle_heart = vec![0, 159, 146, 150];
226 /// // std::str::from_utf8 returns a Utf8Error
227 /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
229 /// // the second byte is invalid here
230 /// assert_eq!(1, error.valid_up_to());
232 #[stable(feature = "utf8_error", since = "1.5.0")]
233 pub fn valid_up_to(&self) -> usize {
237 /// Provides more information about the failure:
239 /// * `None`: the end of the input was reached unexpectedly.
240 /// `self.valid_up_to()` is 1 to 3 bytes from the end of the input.
241 /// If a byte stream (such as a file or a network socket) is being decoded incrementally,
242 /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
244 /// * `Some(len)`: an unexpected byte was encountered.
245 /// The length provided is that of the invalid byte sequence
246 /// that starts at the index given by `valid_up_to()`.
247 /// Decoding should resume after that sequence
248 /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of
251 /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
252 #[stable(feature = "utf8_error_error_len", since = "1.20.0")]
253 pub fn error_len(&self) -> Option<usize> {
254 self.error_len.map(|len| len as usize)
258 /// Converts a slice of bytes to a string slice.
260 /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice
261 /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between
262 /// the two. Not all byte slices are valid string slices, however: [`&str`] requires
263 /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid
264 /// UTF-8, and then does the conversion.
267 /// [byteslice]: ../../std/primitive.slice.html
269 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
270 /// incur the overhead of the validity check, there is an unsafe version of
271 /// this function, [`from_utf8_unchecked`][fromutf8u], which has the same
272 /// behavior but skips the check.
274 /// [fromutf8u]: fn.from_utf8_unchecked.html
276 /// If you need a `String` instead of a `&str`, consider
277 /// [`String::from_utf8`][string].
279 /// [string]: ../../std/string/struct.String.html#method.from_utf8
281 /// Because you can stack-allocate a `[u8; N]`, and you can take a
282 /// [`&[u8]`][byteslice] of it, this function is one way to have a
283 /// stack-allocated string. There is an example of this in the
284 /// examples section below.
286 /// [byteslice]: ../../std/primitive.slice.html
290 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
291 /// provided slice is not UTF-8.
300 /// // some bytes, in a vector
301 /// let sparkle_heart = vec![240, 159, 146, 150];
303 /// // We know these bytes are valid, so just use `unwrap()`.
304 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
306 /// assert_eq!("💖", sparkle_heart);
314 /// // some invalid bytes, in a vector
315 /// let sparkle_heart = vec![0, 159, 146, 150];
317 /// assert!(str::from_utf8(&sparkle_heart).is_err());
320 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
321 /// errors that can be returned.
323 /// [error]: struct.Utf8Error.html
325 /// A "stack allocated string":
330 /// // some bytes, in a stack-allocated array
331 /// let sparkle_heart = [240, 159, 146, 150];
333 /// // We know these bytes are valid, so just use `unwrap()`.
334 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
336 /// assert_eq!("💖", sparkle_heart);
338 #[stable(feature = "rust1", since = "1.0.0")]
339 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
340 run_utf8_validation(v)?;
341 // SAFETY: Just ran validation.
342 Ok(unsafe { from_utf8_unchecked(v) })
345 /// Converts a mutable slice of bytes to a mutable string slice.
354 /// // "Hello, Rust!" as a mutable vector
355 /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33];
357 /// // As we know these bytes are valid, we can use `unwrap()`
358 /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap();
360 /// assert_eq!("Hello, Rust!", outstr);
368 /// // Some invalid bytes in a mutable vector
369 /// let mut invalid = vec![128, 223];
371 /// assert!(str::from_utf8_mut(&mut invalid).is_err());
373 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
374 /// errors that can be returned.
376 /// [error]: struct.Utf8Error.html
377 #[stable(feature = "str_mut_extras", since = "1.20.0")]
378 pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
379 run_utf8_validation(v)?;
380 // SAFETY: Just ran validation.
381 Ok(unsafe { from_utf8_unchecked_mut(v) })
384 /// Converts a slice of bytes to a string slice without checking
385 /// that the string contains valid UTF-8.
387 /// See the safe version, [`from_utf8`][fromutf8], for more information.
389 /// [fromutf8]: fn.from_utf8.html
393 /// This function is unsafe because it does not check that the bytes passed to
394 /// it are valid UTF-8. If this constraint is violated, undefined behavior
395 /// results, as the rest of Rust assumes that [`&str`]s are valid UTF-8.
406 /// // some bytes, in a vector
407 /// let sparkle_heart = vec![240, 159, 146, 150];
409 /// let sparkle_heart = unsafe {
410 /// str::from_utf8_unchecked(&sparkle_heart)
413 /// assert_eq!("💖", sparkle_heart);
416 #[stable(feature = "rust1", since = "1.0.0")]
417 pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
418 // SAFETY: the caller must guarantee that the bytes `v`
419 // are valid UTF-8, thus the cast to `*const str` is safe.
420 // Also, the pointer dereference is safe because that pointer
421 // comes from a reference which is guaranteed to be valid for reads.
422 unsafe { &*(v as *const [u8] as *const str) }
425 /// Converts a slice of bytes to a string slice without checking
426 /// that the string contains valid UTF-8; mutable version.
428 /// See the immutable version, [`from_utf8_unchecked()`] for more information.
437 /// let mut heart = vec![240, 159, 146, 150];
438 /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) };
440 /// assert_eq!("💖", heart);
443 #[stable(feature = "str_mut_extras", since = "1.20.0")]
444 pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
445 // SAFETY: the caller must guarantee that the bytes `v`
446 // are valid UTF-8, thus the cast to `*mut str` is safe.
447 // Also, the pointer dereference is safe because that pointer
448 // comes from a reference which is guaranteed to be valid for writes.
449 unsafe { &mut *(v as *mut [u8] as *mut str) }
452 #[stable(feature = "rust1", since = "1.0.0")]
453 impl fmt::Display for Utf8Error {
454 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
455 if let Some(error_len) = self.error_len {
458 "invalid utf-8 sequence of {} bytes from index {}",
459 error_len, self.valid_up_to
462 write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
471 /// An iterator over the [`char`]s of a string slice.
474 /// This struct is created by the [`chars`] method on [`str`].
475 /// See its documentation for more.
477 /// [`chars`]: str::chars
479 #[stable(feature = "rust1", since = "1.0.0")]
480 pub struct Chars<'a> {
481 iter: slice::Iter<'a, u8>,
484 /// Returns the initial codepoint accumulator for the first byte.
485 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
486 /// for width 3, and 3 bits for width 4.
488 fn utf8_first_byte(byte: u8, width: u32) -> u32 {
489 (byte & (0x7F >> width)) as u32
492 /// Returns the value of `ch` updated with continuation byte `byte`.
494 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
495 (ch << 6) | (byte & CONT_MASK) as u32
498 /// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
501 fn utf8_is_cont_byte(byte: u8) -> bool {
502 (byte & !CONT_MASK) == TAG_CONT_U8
506 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
513 /// Reads the next code point out of a byte iterator (assuming a
514 /// UTF-8-like encoding).
515 #[unstable(feature = "str_internals", issue = "none")]
517 pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
519 let x = *bytes.next()?;
521 return Some(x as u32);
524 // Multibyte case follows
525 // Decode from a byte combination out of: [[[x y] z] w]
526 // NOTE: Performance is sensitive to the exact formulation here
527 let init = utf8_first_byte(x, 2);
528 let y = unwrap_or_0(bytes.next());
529 let mut ch = utf8_acc_cont_byte(init, y);
532 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
533 let z = unwrap_or_0(bytes.next());
534 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
535 ch = init << 12 | y_z;
538 // use only the lower 3 bits of `init`
539 let w = unwrap_or_0(bytes.next());
540 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
547 /// Reads the last code point out of a byte iterator (assuming a
548 /// UTF-8-like encoding).
550 fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
552 I: DoubleEndedIterator<Item = &'a u8>,
555 let w = match *bytes.next_back()? {
556 next_byte if next_byte < 128 => return Some(next_byte as u32),
557 back_byte => back_byte,
560 // Multibyte case follows
561 // Decode from a byte combination out of: [x [y [z w]]]
563 let z = unwrap_or_0(bytes.next_back());
564 ch = utf8_first_byte(z, 2);
565 if utf8_is_cont_byte(z) {
566 let y = unwrap_or_0(bytes.next_back());
567 ch = utf8_first_byte(y, 3);
568 if utf8_is_cont_byte(y) {
569 let x = unwrap_or_0(bytes.next_back());
570 ch = utf8_first_byte(x, 4);
571 ch = utf8_acc_cont_byte(ch, y);
573 ch = utf8_acc_cont_byte(ch, z);
575 ch = utf8_acc_cont_byte(ch, w);
580 #[stable(feature = "rust1", since = "1.0.0")]
581 impl<'a> Iterator for Chars<'a> {
585 fn next(&mut self) -> Option<char> {
586 next_code_point(&mut self.iter).map(|ch| {
587 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
588 unsafe { char::from_u32_unchecked(ch) }
593 fn count(self) -> usize {
594 // length in `char` is equal to the number of non-continuation bytes
595 let bytes_len = self.iter.len();
596 let mut cont_bytes = 0;
597 for &byte in self.iter {
598 cont_bytes += utf8_is_cont_byte(byte) as usize;
600 bytes_len - cont_bytes
604 fn size_hint(&self) -> (usize, Option<usize>) {
605 let len = self.iter.len();
606 // `(len + 3)` can't overflow, because we know that the `slice::Iter`
607 // belongs to a slice in memory which has a maximum length of
608 // `isize::MAX` (that's well below `usize::MAX`).
609 ((len + 3) / 4, Some(len))
613 fn last(mut self) -> Option<char> {
614 // No need to go through the entire string.
619 #[stable(feature = "chars_debug_impl", since = "1.38.0")]
620 impl fmt::Debug for Chars<'_> {
621 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
622 write!(f, "Chars(")?;
623 f.debug_list().entries(self.clone()).finish()?;
629 #[stable(feature = "rust1", since = "1.0.0")]
630 impl<'a> DoubleEndedIterator for Chars<'a> {
632 fn next_back(&mut self) -> Option<char> {
633 next_code_point_reverse(&mut self.iter).map(|ch| {
634 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
635 unsafe { char::from_u32_unchecked(ch) }
640 #[stable(feature = "fused", since = "1.26.0")]
641 impl FusedIterator for Chars<'_> {}
644 /// Views the underlying data as a subslice of the original data.
646 /// This has the same lifetime as the original slice, and so the
647 /// iterator can continue to be used while this exists.
652 /// let mut chars = "abc".chars();
654 /// assert_eq!(chars.as_str(), "abc");
656 /// assert_eq!(chars.as_str(), "bc");
659 /// assert_eq!(chars.as_str(), "");
661 #[stable(feature = "iter_to_slice", since = "1.4.0")]
663 pub fn as_str(&self) -> &'a str {
664 // SAFETY: `Chars` is only made from a str, which guarantees the iter is valid UTF-8.
665 unsafe { from_utf8_unchecked(self.iter.as_slice()) }
669 /// An iterator over the [`char`]s of a string slice, and their positions.
671 /// This struct is created by the [`char_indices`] method on [`str`].
672 /// See its documentation for more.
674 /// [`char_indices`]: str::char_indices
675 #[derive(Clone, Debug)]
676 #[stable(feature = "rust1", since = "1.0.0")]
677 pub struct CharIndices<'a> {
682 #[stable(feature = "rust1", since = "1.0.0")]
683 impl<'a> Iterator for CharIndices<'a> {
684 type Item = (usize, char);
687 fn next(&mut self) -> Option<(usize, char)> {
688 let pre_len = self.iter.iter.len();
689 match self.iter.next() {
692 let index = self.front_offset;
693 let len = self.iter.iter.len();
694 self.front_offset += pre_len - len;
701 fn count(self) -> usize {
706 fn size_hint(&self) -> (usize, Option<usize>) {
707 self.iter.size_hint()
711 fn last(mut self) -> Option<(usize, char)> {
712 // No need to go through the entire string.
717 #[stable(feature = "rust1", since = "1.0.0")]
718 impl<'a> DoubleEndedIterator for CharIndices<'a> {
720 fn next_back(&mut self) -> Option<(usize, char)> {
721 self.iter.next_back().map(|ch| {
722 let index = self.front_offset + self.iter.iter.len();
728 #[stable(feature = "fused", since = "1.26.0")]
729 impl FusedIterator for CharIndices<'_> {}
731 impl<'a> CharIndices<'a> {
732 /// Views the underlying data as a subslice of the original data.
734 /// This has the same lifetime as the original slice, and so the
735 /// iterator can continue to be used while this exists.
736 #[stable(feature = "iter_to_slice", since = "1.4.0")]
738 pub fn as_str(&self) -> &'a str {
743 /// An iterator over the bytes of a string slice.
745 /// This struct is created by the [`bytes`] method on [`str`].
746 /// See its documentation for more.
748 /// [`bytes`]: str::bytes
749 #[stable(feature = "rust1", since = "1.0.0")]
750 #[derive(Clone, Debug)]
751 pub struct Bytes<'a>(Copied<slice::Iter<'a, u8>>);
753 #[stable(feature = "rust1", since = "1.0.0")]
754 impl Iterator for Bytes<'_> {
758 fn next(&mut self) -> Option<u8> {
763 fn size_hint(&self) -> (usize, Option<usize>) {
768 fn count(self) -> usize {
773 fn last(self) -> Option<Self::Item> {
778 fn nth(&mut self, n: usize) -> Option<Self::Item> {
783 fn all<F>(&mut self, f: F) -> bool
785 F: FnMut(Self::Item) -> bool,
791 fn any<F>(&mut self, f: F) -> bool
793 F: FnMut(Self::Item) -> bool,
799 fn find<P>(&mut self, predicate: P) -> Option<Self::Item>
801 P: FnMut(&Self::Item) -> bool,
803 self.0.find(predicate)
807 fn position<P>(&mut self, predicate: P) -> Option<usize>
809 P: FnMut(Self::Item) -> bool,
811 self.0.position(predicate)
815 fn rposition<P>(&mut self, predicate: P) -> Option<usize>
817 P: FnMut(Self::Item) -> bool,
819 self.0.rposition(predicate)
823 #[stable(feature = "rust1", since = "1.0.0")]
824 impl DoubleEndedIterator for Bytes<'_> {
826 fn next_back(&mut self) -> Option<u8> {
831 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
836 fn rfind<P>(&mut self, predicate: P) -> Option<Self::Item>
838 P: FnMut(&Self::Item) -> bool,
840 self.0.rfind(predicate)
844 #[stable(feature = "rust1", since = "1.0.0")]
845 impl ExactSizeIterator for Bytes<'_> {
847 fn len(&self) -> usize {
852 fn is_empty(&self) -> bool {
857 #[stable(feature = "fused", since = "1.26.0")]
858 impl FusedIterator for Bytes<'_> {}
860 #[unstable(feature = "trusted_len", issue = "37572")]
861 unsafe impl TrustedLen for Bytes<'_> {}
864 unsafe impl TrustedRandomAccess for Bytes<'_> {
865 unsafe fn get_unchecked(&mut self, i: usize) -> u8 {
866 // SAFETY: the caller must uphold the safety contract
867 // for `TrustedRandomAccess::get_unchecked`.
868 unsafe { self.0.get_unchecked(i) }
870 fn may_have_side_effect() -> bool {
875 /// This macro generates a Clone impl for string pattern API
876 /// wrapper types of the form X<'a, P>
877 macro_rules! derive_pattern_clone {
878 (clone $t:ident with |$s:ident| $e:expr) => {
879 impl<'a, P> Clone for $t<'a, P>
881 P: Pattern<'a, Searcher: Clone>,
883 fn clone(&self) -> Self {
891 /// This macro generates two public iterator structs
892 /// wrapping a private internal one that makes use of the `Pattern` API.
894 /// For all patterns `P: Pattern<'a>` the following items will be
895 /// generated (generics omitted):
897 /// struct $forward_iterator($internal_iterator);
898 /// struct $reverse_iterator($internal_iterator);
900 /// impl Iterator for $forward_iterator
901 /// { /* internal ends up calling Searcher::next_match() */ }
903 /// impl DoubleEndedIterator for $forward_iterator
904 /// where P::Searcher: DoubleEndedSearcher
905 /// { /* internal ends up calling Searcher::next_match_back() */ }
907 /// impl Iterator for $reverse_iterator
908 /// where P::Searcher: ReverseSearcher
909 /// { /* internal ends up calling Searcher::next_match_back() */ }
911 /// impl DoubleEndedIterator for $reverse_iterator
912 /// where P::Searcher: DoubleEndedSearcher
913 /// { /* internal ends up calling Searcher::next_match() */ }
915 /// The internal one is defined outside the macro, and has almost the same
916 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
917 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
919 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
920 /// `Pattern` might not return the same elements, so actually implementing
921 /// `DoubleEndedIterator` for it would be incorrect.
922 /// (See the docs in `str::pattern` for more details)
924 /// However, the internal struct still represents a single ended iterator from
925 /// either end, and depending on pattern is also a valid double ended iterator,
926 /// so the two wrapper structs implement `Iterator`
927 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
928 /// to the complex impls seen above.
929 macro_rules! generate_pattern_iterators {
933 $(#[$forward_iterator_attribute:meta])*
934 struct $forward_iterator:ident;
938 $(#[$reverse_iterator_attribute:meta])*
939 struct $reverse_iterator:ident;
941 // Stability of all generated items
943 $(#[$common_stability_attribute:meta])*
945 // Internal almost-iterator that is being delegated to
947 $internal_iterator:ident yielding ($iterty:ty);
949 // Kind of delegation - either single ended or double ended
952 $(#[$forward_iterator_attribute])*
953 $(#[$common_stability_attribute])*
954 pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
956 $(#[$common_stability_attribute])*
957 impl<'a, P> fmt::Debug for $forward_iterator<'a, P>
959 P: Pattern<'a, Searcher: fmt::Debug>,
961 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
962 f.debug_tuple(stringify!($forward_iterator))
968 $(#[$common_stability_attribute])*
969 impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
973 fn next(&mut self) -> Option<$iterty> {
978 $(#[$common_stability_attribute])*
979 impl<'a, P> Clone for $forward_iterator<'a, P>
981 P: Pattern<'a, Searcher: Clone>,
983 fn clone(&self) -> Self {
984 $forward_iterator(self.0.clone())
988 $(#[$reverse_iterator_attribute])*
989 $(#[$common_stability_attribute])*
990 pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
992 $(#[$common_stability_attribute])*
993 impl<'a, P> fmt::Debug for $reverse_iterator<'a, P>
995 P: Pattern<'a, Searcher: fmt::Debug>,
997 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
998 f.debug_tuple(stringify!($reverse_iterator))
1004 $(#[$common_stability_attribute])*
1005 impl<'a, P> Iterator for $reverse_iterator<'a, P>
1007 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1009 type Item = $iterty;
1012 fn next(&mut self) -> Option<$iterty> {
1017 $(#[$common_stability_attribute])*
1018 impl<'a, P> Clone for $reverse_iterator<'a, P>
1020 P: Pattern<'a, Searcher: Clone>,
1022 fn clone(&self) -> Self {
1023 $reverse_iterator(self.0.clone())
1027 #[stable(feature = "fused", since = "1.26.0")]
1028 impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {}
1030 #[stable(feature = "fused", since = "1.26.0")]
1031 impl<'a, P> FusedIterator for $reverse_iterator<'a, P>
1033 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1036 generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
1038 $reverse_iterator, $iterty);
1041 double ended; with $(#[$common_stability_attribute:meta])*,
1042 $forward_iterator:ident,
1043 $reverse_iterator:ident, $iterty:ty
1045 $(#[$common_stability_attribute])*
1046 impl<'a, P> DoubleEndedIterator for $forward_iterator<'a, P>
1048 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1051 fn next_back(&mut self) -> Option<$iterty> {
1056 $(#[$common_stability_attribute])*
1057 impl<'a, P> DoubleEndedIterator for $reverse_iterator<'a, P>
1059 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1062 fn next_back(&mut self) -> Option<$iterty> {
1068 single ended; with $(#[$common_stability_attribute:meta])*,
1069 $forward_iterator:ident,
1070 $reverse_iterator:ident, $iterty:ty
1074 derive_pattern_clone! {
1076 with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
1079 struct SplitInternal<'a, P: Pattern<'a>> {
1082 matcher: P::Searcher,
1083 allow_trailing_empty: bool,
1087 impl<'a, P> fmt::Debug for SplitInternal<'a, P>
1089 P: Pattern<'a, Searcher: fmt::Debug>,
1091 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1092 f.debug_struct("SplitInternal")
1093 .field("start", &self.start)
1094 .field("end", &self.end)
1095 .field("matcher", &self.matcher)
1096 .field("allow_trailing_empty", &self.allow_trailing_empty)
1097 .field("finished", &self.finished)
1102 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1104 fn get_end(&mut self) -> Option<&'a str> {
1105 if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
1106 self.finished = true;
1107 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1109 let string = self.matcher.haystack().get_unchecked(self.start..self.end);
1118 fn next(&mut self) -> Option<&'a str> {
1123 let haystack = self.matcher.haystack();
1124 match self.matcher.next_match() {
1125 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1126 Some((a, b)) => unsafe {
1127 let elt = haystack.get_unchecked(self.start..a);
1131 None => self.get_end(),
1136 fn next_inclusive(&mut self) -> Option<&'a str> {
1141 let haystack = self.matcher.haystack();
1142 match self.matcher.next_match() {
1143 // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
1144 // and self.start is either the start of the original string,
1145 // or `b` was assigned to it, so it also lies on unicode boundary.
1146 Some((_, b)) => unsafe {
1147 let elt = haystack.get_unchecked(self.start..b);
1151 None => self.get_end(),
1156 fn next_back(&mut self) -> Option<&'a str>
1158 P::Searcher: ReverseSearcher<'a>,
1164 if !self.allow_trailing_empty {
1165 self.allow_trailing_empty = true;
1166 match self.next_back() {
1167 Some(elt) if !elt.is_empty() => return Some(elt),
1176 let haystack = self.matcher.haystack();
1177 match self.matcher.next_match_back() {
1178 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1179 Some((a, b)) => unsafe {
1180 let elt = haystack.get_unchecked(b..self.end);
1184 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1186 self.finished = true;
1187 Some(haystack.get_unchecked(self.start..self.end))
1193 fn next_back_inclusive(&mut self) -> Option<&'a str>
1195 P::Searcher: ReverseSearcher<'a>,
1201 if !self.allow_trailing_empty {
1202 self.allow_trailing_empty = true;
1203 match self.next_back_inclusive() {
1204 Some(elt) if !elt.is_empty() => return Some(elt),
1213 let haystack = self.matcher.haystack();
1214 match self.matcher.next_match_back() {
1215 // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
1216 // and self.end is either the end of the original string,
1217 // or `b` was assigned to it, so it also lies on unicode boundary.
1218 Some((_, b)) => unsafe {
1219 let elt = haystack.get_unchecked(b..self.end);
1223 // SAFETY: self.start is either the start of the original string,
1224 // or start of a substring that represents the part of the string that hasn't
1225 // iterated yet. Either way, it is guaranteed to lie on unicode boundary.
1226 // self.end is either the end of the original string,
1227 // or `b` was assigned to it, so it also lies on unicode boundary.
1229 self.finished = true;
1230 Some(haystack.get_unchecked(self.start..self.end))
1236 generate_pattern_iterators! {
1238 /// Created with the method [`split`].
1240 /// [`split`]: str::split
1243 /// Created with the method [`rsplit`].
1245 /// [`rsplit`]: str::rsplit
1248 #[stable(feature = "rust1", since = "1.0.0")]
1250 SplitInternal yielding (&'a str);
1251 delegate double ended;
1254 generate_pattern_iterators! {
1256 /// Created with the method [`split_terminator`].
1258 /// [`split_terminator`]: str::split_terminator
1259 struct SplitTerminator;
1261 /// Created with the method [`rsplit_terminator`].
1263 /// [`rsplit_terminator`]: str::rsplit_terminator
1264 struct RSplitTerminator;
1266 #[stable(feature = "rust1", since = "1.0.0")]
1268 SplitInternal yielding (&'a str);
1269 delegate double ended;
1272 derive_pattern_clone! {
1273 clone SplitNInternal
1274 with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
1277 struct SplitNInternal<'a, P: Pattern<'a>> {
1278 iter: SplitInternal<'a, P>,
1279 /// The number of splits remaining
1283 impl<'a, P> fmt::Debug for SplitNInternal<'a, P>
1285 P: Pattern<'a, Searcher: fmt::Debug>,
1287 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1288 f.debug_struct("SplitNInternal")
1289 .field("iter", &self.iter)
1290 .field("count", &self.count)
1295 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
1297 fn next(&mut self) -> Option<&'a str> {
1312 fn next_back(&mut self) -> Option<&'a str>
1314 P::Searcher: ReverseSearcher<'a>,
1324 self.iter.next_back()
1330 generate_pattern_iterators! {
1332 /// Created with the method [`splitn`].
1334 /// [`splitn`]: str::splitn
1337 /// Created with the method [`rsplitn`].
1339 /// [`rsplitn`]: str::rsplitn
1342 #[stable(feature = "rust1", since = "1.0.0")]
1344 SplitNInternal yielding (&'a str);
1345 delegate single ended;
1348 derive_pattern_clone! {
1349 clone MatchIndicesInternal
1350 with |s| MatchIndicesInternal(s.0.clone())
1353 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
1355 impl<'a, P> fmt::Debug for MatchIndicesInternal<'a, P>
1357 P: Pattern<'a, Searcher: fmt::Debug>,
1359 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1360 f.debug_tuple("MatchIndicesInternal").field(&self.0).finish()
1364 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1366 fn next(&mut self) -> Option<(usize, &'a str)> {
1369 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1370 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1374 fn next_back(&mut self) -> Option<(usize, &'a str)>
1376 P::Searcher: ReverseSearcher<'a>,
1380 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1381 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1385 generate_pattern_iterators! {
1387 /// Created with the method [`match_indices`].
1389 /// [`match_indices`]: str::match_indices
1390 struct MatchIndices;
1392 /// Created with the method [`rmatch_indices`].
1394 /// [`rmatch_indices`]: str::rmatch_indices
1395 struct RMatchIndices;
1397 #[stable(feature = "str_match_indices", since = "1.5.0")]
1399 MatchIndicesInternal yielding ((usize, &'a str));
1400 delegate double ended;
1403 derive_pattern_clone! {
1404 clone MatchesInternal
1405 with |s| MatchesInternal(s.0.clone())
1408 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1410 impl<'a, P> fmt::Debug for MatchesInternal<'a, P>
1412 P: Pattern<'a, Searcher: fmt::Debug>,
1414 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1415 f.debug_tuple("MatchesInternal").field(&self.0).finish()
1419 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1421 fn next(&mut self) -> Option<&'a str> {
1422 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1423 self.0.next_match().map(|(a, b)| unsafe {
1424 // Indices are known to be on utf8 boundaries
1425 self.0.haystack().get_unchecked(a..b)
1430 fn next_back(&mut self) -> Option<&'a str>
1432 P::Searcher: ReverseSearcher<'a>,
1434 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1435 self.0.next_match_back().map(|(a, b)| unsafe {
1436 // Indices are known to be on utf8 boundaries
1437 self.0.haystack().get_unchecked(a..b)
1442 generate_pattern_iterators! {
1444 /// Created with the method [`matches`].
1446 /// [`matches`]: str::matches
1449 /// Created with the method [`rmatches`].
1451 /// [`rmatches`]: str::rmatches
1454 #[stable(feature = "str_matches", since = "1.2.0")]
1456 MatchesInternal yielding (&'a str);
1457 delegate double ended;
1460 /// An iterator over the lines of a string, as string slices.
1462 /// This struct is created with the [`lines`] method on [`str`].
1463 /// See its documentation for more.
1465 /// [`lines`]: str::lines
1466 #[stable(feature = "rust1", since = "1.0.0")]
1467 #[derive(Clone, Debug)]
1468 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1470 #[stable(feature = "rust1", since = "1.0.0")]
1471 impl<'a> Iterator for Lines<'a> {
1472 type Item = &'a str;
1475 fn next(&mut self) -> Option<&'a str> {
1480 fn size_hint(&self) -> (usize, Option<usize>) {
1485 fn last(mut self) -> Option<&'a str> {
1490 #[stable(feature = "rust1", since = "1.0.0")]
1491 impl<'a> DoubleEndedIterator for Lines<'a> {
1493 fn next_back(&mut self) -> Option<&'a str> {
1498 #[stable(feature = "fused", since = "1.26.0")]
1499 impl FusedIterator for Lines<'_> {}
1501 /// Created with the method [`lines_any`].
1503 /// [`lines_any`]: str::lines_any
1504 #[stable(feature = "rust1", since = "1.0.0")]
1505 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1506 #[derive(Clone, Debug)]
1507 #[allow(deprecated)]
1508 pub struct LinesAny<'a>(Lines<'a>);
1511 /// A nameable, cloneable fn type
1513 struct LinesAnyMap impl<'a> Fn = |line: &'a str| -> &'a str {
1515 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1520 #[stable(feature = "rust1", since = "1.0.0")]
1521 #[allow(deprecated)]
1522 impl<'a> Iterator for LinesAny<'a> {
1523 type Item = &'a str;
1526 fn next(&mut self) -> Option<&'a str> {
1531 fn size_hint(&self) -> (usize, Option<usize>) {
1536 #[stable(feature = "rust1", since = "1.0.0")]
1537 #[allow(deprecated)]
1538 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1540 fn next_back(&mut self) -> Option<&'a str> {
1545 #[stable(feature = "fused", since = "1.26.0")]
1546 #[allow(deprecated)]
1547 impl FusedIterator for LinesAny<'_> {}
1550 Section: UTF-8 validation
1553 // use truncation to fit u64 into usize
1554 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1556 /// Returns `true` if any byte in the word `x` is nonascii (>= 128).
1558 fn contains_nonascii(x: usize) -> bool {
1559 (x & NONASCII_MASK) != 0
1562 /// Walks through `v` checking that it's a valid UTF-8 sequence,
1563 /// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
1565 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1569 let usize_bytes = mem::size_of::<usize>();
1570 let ascii_block_size = 2 * usize_bytes;
1571 let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1572 let align = v.as_ptr().align_offset(usize_bytes);
1575 let old_offset = index;
1577 ($error_len: expr) => {
1578 return Err(Utf8Error { valid_up_to: old_offset, error_len: $error_len });
1585 // we needed data, but there was none: error!
1593 let first = v[index];
1595 let w = UTF8_CHAR_WIDTH[first as usize];
1596 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
1597 // first C2 80 last DF BF
1598 // 3-byte encoding is for codepoints \u{0800} to \u{ffff}
1599 // first E0 A0 80 last EF BF BF
1600 // excluding surrogates codepoints \u{d800} to \u{dfff}
1601 // ED A0 80 to ED BF BF
1602 // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1603 // first F0 90 80 80 last F4 8F BF BF
1605 // Use the UTF-8 syntax from the RFC
1607 // https://tools.ietf.org/html/rfc3629
1609 // UTF8-2 = %xC2-DF UTF8-tail
1610 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1611 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1612 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1613 // %xF4 %x80-8F 2( UTF8-tail )
1616 if next!() & !CONT_MASK != TAG_CONT_U8 {
1621 match (first, next!()) {
1623 | (0xE1..=0xEC, 0x80..=0xBF)
1624 | (0xED, 0x80..=0x9F)
1625 | (0xEE..=0xEF, 0x80..=0xBF) => {}
1628 if next!() & !CONT_MASK != TAG_CONT_U8 {
1633 match (first, next!()) {
1634 (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
1637 if next!() & !CONT_MASK != TAG_CONT_U8 {
1640 if next!() & !CONT_MASK != TAG_CONT_U8 {
1648 // Ascii case, try to skip forward quickly.
1649 // When the pointer is aligned, read 2 words of data per iteration
1650 // until we find a word containing a non-ascii byte.
1651 if align != usize::MAX && align.wrapping_sub(index) % usize_bytes == 0 {
1652 let ptr = v.as_ptr();
1653 while index < blocks_end {
1654 // SAFETY: since `align - index` and `ascii_block_size` are
1655 // multiples of `usize_bytes`, `block = ptr.add(index)` is
1656 // always aligned with a `usize` so it's safe to dereference
1657 // both `block` and `block.offset(1)`.
1659 let block = ptr.add(index) as *const usize;
1660 // break if there is a nonascii byte
1661 let zu = contains_nonascii(*block);
1662 let zv = contains_nonascii(*block.offset(1));
1667 index += ascii_block_size;
1669 // step from the point where the wordwise loop stopped
1670 while index < len && v[index] < 128 {
1682 // https://tools.ietf.org/html/rfc3629
1683 static UTF8_CHAR_WIDTH: [u8; 256] = [
1684 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1686 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1688 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1690 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1696 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1698 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
1699 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
1702 /// Given a first byte, determines how many bytes are in this UTF-8 character.
1703 #[unstable(feature = "str_internals", issue = "none")]
1705 pub fn utf8_char_width(b: u8) -> usize {
1706 UTF8_CHAR_WIDTH[b as usize] as usize
1709 /// Mask of the value bits of a continuation byte.
1710 const CONT_MASK: u8 = 0b0011_1111;
1711 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
1712 const TAG_CONT_U8: u8 = 0b1000_0000;
1715 Section: Trait implementations
1719 use crate::cmp::Ordering;
1722 use crate::slice::SliceIndex;
1724 /// Implements ordering of strings.
1726 /// Strings are ordered lexicographically by their byte values. This orders Unicode code
1727 /// points based on their positions in the code charts. This is not necessarily the same as
1728 /// "alphabetical" order, which varies by language and locale. Sorting strings according to
1729 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1731 #[stable(feature = "rust1", since = "1.0.0")]
1734 fn cmp(&self, other: &str) -> Ordering {
1735 self.as_bytes().cmp(other.as_bytes())
1739 #[stable(feature = "rust1", since = "1.0.0")]
1740 impl PartialEq for str {
1742 fn eq(&self, other: &str) -> bool {
1743 self.as_bytes() == other.as_bytes()
1746 fn ne(&self, other: &str) -> bool {
1751 #[stable(feature = "rust1", since = "1.0.0")]
1754 /// Implements comparison operations on strings.
1756 /// Strings are compared lexicographically by their byte values. This compares Unicode code
1757 /// points based on their positions in the code charts. This is not necessarily the same as
1758 /// "alphabetical" order, which varies by language and locale. Comparing strings according to
1759 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1761 #[stable(feature = "rust1", since = "1.0.0")]
1762 impl PartialOrd for str {
1764 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1765 Some(self.cmp(other))
1769 #[stable(feature = "rust1", since = "1.0.0")]
1770 impl<I> ops::Index<I> for str
1774 type Output = I::Output;
1777 fn index(&self, index: I) -> &I::Output {
1782 #[stable(feature = "rust1", since = "1.0.0")]
1783 impl<I> ops::IndexMut<I> for str
1788 fn index_mut(&mut self, index: I) -> &mut I::Output {
1789 index.index_mut(self)
1796 fn str_index_overflow_fail() -> ! {
1797 panic!("attempted to index str up to maximum usize");
1800 /// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`.
1802 /// Returns a slice of the whole string, i.e., returns `&self` or `&mut
1803 /// self`. Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`. Unlike
1804 /// other indexing operations, this can never panic.
1806 /// This operation is `O(1)`.
1808 /// Prior to 1.20.0, these indexing operations were still supported by
1809 /// direct implementation of `Index` and `IndexMut`.
1811 /// Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`.
1812 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1813 unsafe impl SliceIndex<str> for ops::RangeFull {
1816 fn get(self, slice: &str) -> Option<&Self::Output> {
1820 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1824 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
1828 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
1832 fn index(self, slice: &str) -> &Self::Output {
1836 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1841 /// Implements substring slicing with syntax `&self[begin .. end]` or `&mut
1842 /// self[begin .. end]`.
1844 /// Returns a slice of the given string from the byte range
1845 /// [`begin`, `end`).
1847 /// This operation is `O(1)`.
1849 /// Prior to 1.20.0, these indexing operations were still supported by
1850 /// direct implementation of `Index` and `IndexMut`.
1854 /// Panics if `begin` or `end` does not point to the starting byte offset of
1855 /// a character (as defined by `is_char_boundary`), if `begin > end`, or if
1861 /// let s = "Löwe 老虎 Léopard";
1862 /// assert_eq!(&s[0 .. 1], "L");
1864 /// assert_eq!(&s[1 .. 9], "öwe 老");
1866 /// // these will panic:
1867 /// // byte 2 lies within `ö`:
1870 /// // byte 8 lies within `老`
1873 /// // byte 100 is outside the string
1874 /// // &s[3 .. 100];
1876 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1877 unsafe impl SliceIndex<str> for ops::Range<usize> {
1880 fn get(self, slice: &str) -> Option<&Self::Output> {
1881 if self.start <= self.end
1882 && slice.is_char_boundary(self.start)
1883 && slice.is_char_boundary(self.end)
1885 // SAFETY: just checked that `start` and `end` are on a char boundary,
1886 // and we are passing in a safe reference, so the return value will also be one.
1887 // We also checked char boundaries, so this is valid UTF-8.
1888 Some(unsafe { &*self.get_unchecked(slice) })
1894 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1895 if self.start <= self.end
1896 && slice.is_char_boundary(self.start)
1897 && slice.is_char_boundary(self.end)
1899 // SAFETY: just checked that `start` and `end` are on a char boundary.
1900 // We know the pointer is unique because we got it from `slice`.
1901 Some(unsafe { &mut *self.get_unchecked_mut(slice) })
1907 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
1908 let slice = slice as *const [u8];
1909 // SAFETY: the caller guarantees that `self` is in bounds of `slice`
1910 // which satisfies all the conditions for `add`.
1911 let ptr = unsafe { slice.as_ptr().add(self.start) };
1912 let len = self.end - self.start;
1913 ptr::slice_from_raw_parts(ptr, len) as *const str
1916 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
1917 let slice = slice as *mut [u8];
1918 // SAFETY: see comments for `get_unchecked`.
1919 let ptr = unsafe { slice.as_mut_ptr().add(self.start) };
1920 let len = self.end - self.start;
1921 ptr::slice_from_raw_parts_mut(ptr, len) as *mut str
1924 fn index(self, slice: &str) -> &Self::Output {
1925 let (start, end) = (self.start, self.end);
1926 match self.get(slice) {
1928 None => super::slice_error_fail(slice, start, end),
1932 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1933 // is_char_boundary checks that the index is in [0, .len()]
1934 // cannot reuse `get` as above, because of NLL trouble
1935 if self.start <= self.end
1936 && slice.is_char_boundary(self.start)
1937 && slice.is_char_boundary(self.end)
1939 // SAFETY: just checked that `start` and `end` are on a char boundary,
1940 // and we are passing in a safe reference, so the return value will also be one.
1941 unsafe { &mut *self.get_unchecked_mut(slice) }
1943 super::slice_error_fail(slice, self.start, self.end)
1948 /// Implements substring slicing with syntax `&self[.. end]` or `&mut
1951 /// Returns a slice of the given string from the byte range [`0`, `end`).
1952 /// Equivalent to `&self[0 .. end]` or `&mut self[0 .. end]`.
1954 /// This operation is `O(1)`.
1956 /// Prior to 1.20.0, these indexing operations were still supported by
1957 /// direct implementation of `Index` and `IndexMut`.
1961 /// Panics if `end` does not point to the starting byte offset of a
1962 /// character (as defined by `is_char_boundary`), or if `end > len`.
1963 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1964 unsafe impl SliceIndex<str> for ops::RangeTo<usize> {
1967 fn get(self, slice: &str) -> Option<&Self::Output> {
1968 if slice.is_char_boundary(self.end) {
1969 // SAFETY: just checked that `end` is on a char boundary,
1970 // and we are passing in a safe reference, so the return value will also be one.
1971 Some(unsafe { &*self.get_unchecked(slice) })
1977 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1978 if slice.is_char_boundary(self.end) {
1979 // SAFETY: just checked that `end` is on a char boundary,
1980 // and we are passing in a safe reference, so the return value will also be one.
1981 Some(unsafe { &mut *self.get_unchecked_mut(slice) })
1987 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
1988 let slice = slice as *const [u8];
1989 let ptr = slice.as_ptr();
1990 ptr::slice_from_raw_parts(ptr, self.end) as *const str
1993 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
1994 let slice = slice as *mut [u8];
1995 let ptr = slice.as_mut_ptr();
1996 ptr::slice_from_raw_parts_mut(ptr, self.end) as *mut str
1999 fn index(self, slice: &str) -> &Self::Output {
2001 match self.get(slice) {
2003 None => super::slice_error_fail(slice, 0, end),
2007 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2008 if slice.is_char_boundary(self.end) {
2009 // SAFETY: just checked that `end` is on a char boundary,
2010 // and we are passing in a safe reference, so the return value will also be one.
2011 unsafe { &mut *self.get_unchecked_mut(slice) }
2013 super::slice_error_fail(slice, 0, self.end)
2018 /// Implements substring slicing with syntax `&self[begin ..]` or `&mut
2019 /// self[begin ..]`.
2021 /// Returns a slice of the given string from the byte range [`begin`,
2022 /// `len`). Equivalent to `&self[begin .. len]` or `&mut self[begin ..
2025 /// This operation is `O(1)`.
2027 /// Prior to 1.20.0, these indexing operations were still supported by
2028 /// direct implementation of `Index` and `IndexMut`.
2032 /// Panics if `begin` does not point to the starting byte offset of
2033 /// a character (as defined by `is_char_boundary`), or if `begin >= len`.
2034 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2035 unsafe impl SliceIndex<str> for ops::RangeFrom<usize> {
2038 fn get(self, slice: &str) -> Option<&Self::Output> {
2039 if slice.is_char_boundary(self.start) {
2040 // SAFETY: just checked that `start` is on a char boundary,
2041 // and we are passing in a safe reference, so the return value will also be one.
2042 Some(unsafe { &*self.get_unchecked(slice) })
2048 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2049 if slice.is_char_boundary(self.start) {
2050 // SAFETY: just checked that `start` is on a char boundary,
2051 // and we are passing in a safe reference, so the return value will also be one.
2052 Some(unsafe { &mut *self.get_unchecked_mut(slice) })
2058 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
2059 let slice = slice as *const [u8];
2060 // SAFETY: the caller guarantees that `self` is in bounds of `slice`
2061 // which satisfies all the conditions for `add`.
2062 let ptr = unsafe { slice.as_ptr().add(self.start) };
2063 let len = slice.len() - self.start;
2064 ptr::slice_from_raw_parts(ptr, len) as *const str
2067 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
2068 let slice = slice as *mut [u8];
2069 // SAFETY: identical to `get_unchecked`.
2070 let ptr = unsafe { slice.as_mut_ptr().add(self.start) };
2071 let len = slice.len() - self.start;
2072 ptr::slice_from_raw_parts_mut(ptr, len) as *mut str
2075 fn index(self, slice: &str) -> &Self::Output {
2076 let (start, end) = (self.start, slice.len());
2077 match self.get(slice) {
2079 None => super::slice_error_fail(slice, start, end),
2083 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2084 if slice.is_char_boundary(self.start) {
2085 // SAFETY: just checked that `start` is on a char boundary,
2086 // and we are passing in a safe reference, so the return value will also be one.
2087 unsafe { &mut *self.get_unchecked_mut(slice) }
2089 super::slice_error_fail(slice, self.start, slice.len())
2094 /// Implements substring slicing with syntax `&self[begin ..= end]` or `&mut
2095 /// self[begin ..= end]`.
2097 /// Returns a slice of the given string from the byte range
2098 /// [`begin`, `end`]. Equivalent to `&self [begin .. end + 1]` or `&mut
2099 /// self[begin .. end + 1]`, except if `end` has the maximum value for
2102 /// This operation is `O(1)`.
2106 /// Panics if `begin` does not point to the starting byte offset of
2107 /// a character (as defined by `is_char_boundary`), if `end` does not point
2108 /// to the ending byte offset of a character (`end + 1` is either a starting
2109 /// byte offset or equal to `len`), if `begin > end`, or if `end >= len`.
2110 #[stable(feature = "inclusive_range", since = "1.26.0")]
2111 unsafe impl SliceIndex<str> for ops::RangeInclusive<usize> {
2114 fn get(self, slice: &str) -> Option<&Self::Output> {
2115 if *self.end() == usize::MAX {
2118 (*self.start()..self.end() + 1).get(slice)
2122 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2123 if *self.end() == usize::MAX {
2126 (*self.start()..self.end() + 1).get_mut(slice)
2130 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
2131 // SAFETY: the caller must uphold the safety contract for `get_unchecked`.
2132 unsafe { (*self.start()..self.end() + 1).get_unchecked(slice) }
2135 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
2136 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
2137 unsafe { (*self.start()..self.end() + 1).get_unchecked_mut(slice) }
2140 fn index(self, slice: &str) -> &Self::Output {
2141 if *self.end() == usize::MAX {
2142 str_index_overflow_fail();
2144 (*self.start()..self.end() + 1).index(slice)
2147 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2148 if *self.end() == usize::MAX {
2149 str_index_overflow_fail();
2151 (*self.start()..self.end() + 1).index_mut(slice)
2155 /// Implements substring slicing with syntax `&self[..= end]` or `&mut
2158 /// Returns a slice of the given string from the byte range [0, `end`].
2159 /// Equivalent to `&self [0 .. end + 1]`, except if `end` has the maximum
2160 /// value for `usize`.
2162 /// This operation is `O(1)`.
2166 /// Panics if `end` does not point to the ending byte offset of a character
2167 /// (`end + 1` is either a starting byte offset as defined by
2168 /// `is_char_boundary`, or equal to `len`), or if `end >= len`.
2169 #[stable(feature = "inclusive_range", since = "1.26.0")]
2170 unsafe impl SliceIndex<str> for ops::RangeToInclusive<usize> {
2173 fn get(self, slice: &str) -> Option<&Self::Output> {
2174 if self.end == usize::MAX { None } else { (..self.end + 1).get(slice) }
2177 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2178 if self.end == usize::MAX { None } else { (..self.end + 1).get_mut(slice) }
2181 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
2182 // SAFETY: the caller must uphold the safety contract for `get_unchecked`.
2183 unsafe { (..self.end + 1).get_unchecked(slice) }
2186 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
2187 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
2188 unsafe { (..self.end + 1).get_unchecked_mut(slice) }
2191 fn index(self, slice: &str) -> &Self::Output {
2192 if self.end == usize::MAX {
2193 str_index_overflow_fail();
2195 (..self.end + 1).index(slice)
2198 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2199 if self.end == usize::MAX {
2200 str_index_overflow_fail();
2202 (..self.end + 1).index_mut(slice)
2207 // truncate `&str` to length at most equal to `max`
2208 // return `true` if it were truncated, and the new str.
2209 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
2213 while !s.is_char_boundary(max) {
2223 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
2224 const MAX_DISPLAY_LENGTH: usize = 256;
2225 let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
2226 let ellipsis = if truncated { "[...]" } else { "" };
2229 if begin > s.len() || end > s.len() {
2230 let oob_index = if begin > s.len() { begin } else { end };
2231 panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
2237 "begin <= end ({} <= {}) when slicing `{}`{}",
2244 // 3. character boundary
2245 let index = if !s.is_char_boundary(begin) { begin } else { end };
2246 // find the character
2247 let mut char_start = index;
2248 while !s.is_char_boundary(char_start) {
2251 // `char_start` must be less than len and a char boundary
2252 let ch = s[char_start..].chars().next().unwrap();
2253 let char_range = char_start..char_start + ch.len_utf8();
2255 "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
2256 index, ch, char_range, s_trunc, ellipsis
2263 /// Returns the length of `self`.
2265 /// This length is in bytes, not [`char`]s or graphemes. In other words,
2266 /// it may not be what a human considers the length of the string.
2273 /// let len = "foo".len();
2274 /// assert_eq!(3, len);
2276 /// assert_eq!("ƒoo".len(), 4); // fancy f!
2277 /// assert_eq!("ƒoo".chars().count(), 3);
2279 #[stable(feature = "rust1", since = "1.0.0")]
2280 #[rustc_const_stable(feature = "const_str_len", since = "1.32.0")]
2282 pub const fn len(&self) -> usize {
2283 self.as_bytes().len()
2286 /// Returns `true` if `self` has a length of zero bytes.
2294 /// assert!(s.is_empty());
2296 /// let s = "not empty";
2297 /// assert!(!s.is_empty());
2300 #[stable(feature = "rust1", since = "1.0.0")]
2301 #[rustc_const_stable(feature = "const_str_is_empty", since = "1.32.0")]
2302 pub const fn is_empty(&self) -> bool {
2306 /// Checks that `index`-th byte is the first byte in a UTF-8 code point
2307 /// sequence or the end of the string.
2309 /// The start and end of the string (when `index == self.len()`) are
2310 /// considered to be boundaries.
2312 /// Returns `false` if `index` is greater than `self.len()`.
2317 /// let s = "Löwe 老虎 Léopard";
2318 /// assert!(s.is_char_boundary(0));
2320 /// assert!(s.is_char_boundary(6));
2321 /// assert!(s.is_char_boundary(s.len()));
2323 /// // second byte of `ö`
2324 /// assert!(!s.is_char_boundary(2));
2326 /// // third byte of `老`
2327 /// assert!(!s.is_char_boundary(8));
2329 #[stable(feature = "is_char_boundary", since = "1.9.0")]
2331 pub fn is_char_boundary(&self, index: usize) -> bool {
2332 // 0 and len are always ok.
2333 // Test for 0 explicitly so that it can optimize out the check
2334 // easily and skip reading string data for that case.
2335 if index == 0 || index == self.len() {
2338 match self.as_bytes().get(index) {
2340 // This is bit magic equivalent to: b < 128 || b >= 192
2341 Some(&b) => (b as i8) >= -0x40,
2345 /// Converts a string slice to a byte slice. To convert the byte slice back
2346 /// into a string slice, use the [`from_utf8`] function.
2353 /// let bytes = "bors".as_bytes();
2354 /// assert_eq!(b"bors", bytes);
2356 #[stable(feature = "rust1", since = "1.0.0")]
2357 #[rustc_const_stable(feature = "str_as_bytes", since = "1.32.0")]
2359 #[allow(unused_attributes)]
2360 #[allow_internal_unstable(const_fn_union)]
2361 pub const fn as_bytes(&self) -> &[u8] {
2367 // SAFETY: const sound because we transmute two types with the same layout
2368 unsafe { Slices { str: self }.slice }
2371 /// Converts a mutable string slice to a mutable byte slice.
2375 /// The caller must ensure that the content of the slice is valid UTF-8
2376 /// before the borrow ends and the underlying `str` is used.
2378 /// Use of a `str` whose contents are not valid UTF-8 is undefined behavior.
2385 /// let mut s = String::from("Hello");
2386 /// let bytes = unsafe { s.as_bytes_mut() };
2388 /// assert_eq!(b"Hello", bytes);
2394 /// let mut s = String::from("🗻∈🌏");
2397 /// let bytes = s.as_bytes_mut();
2399 /// bytes[0] = 0xF0;
2400 /// bytes[1] = 0x9F;
2401 /// bytes[2] = 0x8D;
2402 /// bytes[3] = 0x94;
2405 /// assert_eq!("🍔∈🌏", s);
2407 #[stable(feature = "str_mut_extras", since = "1.20.0")]
2409 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
2410 // SAFETY: the cast from `&str` to `&[u8]` is safe since `str`
2411 // has the same layout as `&[u8]` (only libstd can make this guarantee).
2412 // The pointer dereference is safe since it comes from a mutable reference which
2413 // is guaranteed to be valid for writes.
2414 unsafe { &mut *(self as *mut str as *mut [u8]) }
2417 /// Converts a string slice to a raw pointer.
2419 /// As string slices are a slice of bytes, the raw pointer points to a
2420 /// [`u8`]. This pointer will be pointing to the first byte of the string
2423 /// The caller must ensure that the returned pointer is never written to.
2424 /// If you need to mutate the contents of the string slice, use [`as_mut_ptr`].
2426 /// [`as_mut_ptr`]: str::as_mut_ptr
2433 /// let s = "Hello";
2434 /// let ptr = s.as_ptr();
2436 #[stable(feature = "rust1", since = "1.0.0")]
2437 #[rustc_const_stable(feature = "rustc_str_as_ptr", since = "1.32.0")]
2439 pub const fn as_ptr(&self) -> *const u8 {
2440 self as *const str as *const u8
2443 /// Converts a mutable string slice to a raw pointer.
2445 /// As string slices are a slice of bytes, the raw pointer points to a
2446 /// [`u8`]. This pointer will be pointing to the first byte of the string
2449 /// It is your responsibility to make sure that the string slice only gets
2450 /// modified in a way that it remains valid UTF-8.
2451 #[stable(feature = "str_as_mut_ptr", since = "1.36.0")]
2453 pub fn as_mut_ptr(&mut self) -> *mut u8 {
2454 self as *mut str as *mut u8
2457 /// Returns a subslice of `str`.
2459 /// This is the non-panicking alternative to indexing the `str`. Returns
2460 /// [`None`] whenever equivalent indexing operation would panic.
2465 /// let v = String::from("🗻∈🌏");
2467 /// assert_eq!(Some("🗻"), v.get(0..4));
2469 /// // indices not on UTF-8 sequence boundaries
2470 /// assert!(v.get(1..).is_none());
2471 /// assert!(v.get(..8).is_none());
2473 /// // out of bounds
2474 /// assert!(v.get(..42).is_none());
2476 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2478 pub fn get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output> {
2482 /// Returns a mutable subslice of `str`.
2484 /// This is the non-panicking alternative to indexing the `str`. Returns
2485 /// [`None`] whenever equivalent indexing operation would panic.
2490 /// let mut v = String::from("hello");
2491 /// // correct length
2492 /// assert!(v.get_mut(0..5).is_some());
2493 /// // out of bounds
2494 /// assert!(v.get_mut(..42).is_none());
2495 /// assert_eq!(Some("he"), v.get_mut(0..2).map(|v| &*v));
2497 /// assert_eq!("hello", v);
2499 /// let s = v.get_mut(0..2);
2500 /// let s = s.map(|s| {
2501 /// s.make_ascii_uppercase();
2504 /// assert_eq!(Some("HE"), s);
2506 /// assert_eq!("HEllo", v);
2508 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2510 pub fn get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output> {
2514 /// Returns an unchecked subslice of `str`.
2516 /// This is the unchecked alternative to indexing the `str`.
2520 /// Callers of this function are responsible that these preconditions are
2523 /// * The starting index must not exceed the ending index;
2524 /// * Indexes must be within bounds of the original slice;
2525 /// * Indexes must lie on UTF-8 sequence boundaries.
2527 /// Failing that, the returned string slice may reference invalid memory or
2528 /// violate the invariants communicated by the `str` type.
2535 /// assert_eq!("🗻", v.get_unchecked(0..4));
2536 /// assert_eq!("∈", v.get_unchecked(4..7));
2537 /// assert_eq!("🌏", v.get_unchecked(7..11));
2540 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2542 pub unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output {
2543 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
2544 // the slice is dereferencable because `self` is a safe reference.
2545 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2546 unsafe { &*i.get_unchecked(self) }
2549 /// Returns a mutable, unchecked subslice of `str`.
2551 /// This is the unchecked alternative to indexing the `str`.
2555 /// Callers of this function are responsible that these preconditions are
2558 /// * The starting index must not exceed the ending index;
2559 /// * Indexes must be within bounds of the original slice;
2560 /// * Indexes must lie on UTF-8 sequence boundaries.
2562 /// Failing that, the returned string slice may reference invalid memory or
2563 /// violate the invariants communicated by the `str` type.
2568 /// let mut v = String::from("🗻∈🌏");
2570 /// assert_eq!("🗻", v.get_unchecked_mut(0..4));
2571 /// assert_eq!("∈", v.get_unchecked_mut(4..7));
2572 /// assert_eq!("🌏", v.get_unchecked_mut(7..11));
2575 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2577 pub unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output {
2578 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
2579 // the slice is dereferencable because `self` is a safe reference.
2580 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2581 unsafe { &mut *i.get_unchecked_mut(self) }
2584 /// Creates a string slice from another string slice, bypassing safety
2587 /// This is generally not recommended, use with caution! For a safe
2588 /// alternative see [`str`] and [`Index`].
2590 /// [`Index`]: crate::ops::Index
2592 /// This new slice goes from `begin` to `end`, including `begin` but
2593 /// excluding `end`.
2595 /// To get a mutable string slice instead, see the
2596 /// [`slice_mut_unchecked`] method.
2598 /// [`slice_mut_unchecked`]: str::slice_mut_unchecked
2602 /// Callers of this function are responsible that three preconditions are
2605 /// * `begin` must not exceed `end`.
2606 /// * `begin` and `end` must be byte positions within the string slice.
2607 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2614 /// let s = "Löwe 老虎 Léopard";
2617 /// assert_eq!("Löwe 老虎 Léopard", s.slice_unchecked(0, 21));
2620 /// let s = "Hello, world!";
2623 /// assert_eq!("world", s.slice_unchecked(7, 12));
2626 #[stable(feature = "rust1", since = "1.0.0")]
2627 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked(begin..end)` instead")]
2629 pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
2630 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
2631 // the slice is dereferencable because `self` is a safe reference.
2632 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2633 unsafe { &*(begin..end).get_unchecked(self) }
2636 /// Creates a string slice from another string slice, bypassing safety
2638 /// This is generally not recommended, use with caution! For a safe
2639 /// alternative see [`str`] and [`IndexMut`].
2641 /// [`IndexMut`]: crate::ops::IndexMut
2643 /// This new slice goes from `begin` to `end`, including `begin` but
2644 /// excluding `end`.
2646 /// To get an immutable string slice instead, see the
2647 /// [`slice_unchecked`] method.
2649 /// [`slice_unchecked`]: str::slice_unchecked
2653 /// Callers of this function are responsible that three preconditions are
2656 /// * `begin` must not exceed `end`.
2657 /// * `begin` and `end` must be byte positions within the string slice.
2658 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2659 #[stable(feature = "str_slice_mut", since = "1.5.0")]
2660 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked_mut(begin..end)` instead")]
2662 pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
2663 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
2664 // the slice is dereferencable because `self` is a safe reference.
2665 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2666 unsafe { &mut *(begin..end).get_unchecked_mut(self) }
2669 /// Divide one string slice into two at an index.
2671 /// The argument, `mid`, should be a byte offset from the start of the
2672 /// string. It must also be on the boundary of a UTF-8 code point.
2674 /// The two slices returned go from the start of the string slice to `mid`,
2675 /// and from `mid` to the end of the string slice.
2677 /// To get mutable string slices instead, see the [`split_at_mut`]
2680 /// [`split_at_mut`]: str::split_at_mut
2684 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2685 /// past the end of the last code point of the string slice.
2692 /// let s = "Per Martin-Löf";
2694 /// let (first, last) = s.split_at(3);
2696 /// assert_eq!("Per", first);
2697 /// assert_eq!(" Martin-Löf", last);
2700 #[stable(feature = "str_split_at", since = "1.4.0")]
2701 pub fn split_at(&self, mid: usize) -> (&str, &str) {
2702 // is_char_boundary checks that the index is in [0, .len()]
2703 if self.is_char_boundary(mid) {
2704 // SAFETY: just checked that `mid` is on a char boundary.
2705 unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) }
2707 slice_error_fail(self, 0, mid)
2711 /// Divide one mutable string slice into two at an index.
2713 /// The argument, `mid`, should be a byte offset from the start of the
2714 /// string. It must also be on the boundary of a UTF-8 code point.
2716 /// The two slices returned go from the start of the string slice to `mid`,
2717 /// and from `mid` to the end of the string slice.
2719 /// To get immutable string slices instead, see the [`split_at`] method.
2721 /// [`split_at`]: str::split_at
2725 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2726 /// past the end of the last code point of the string slice.
2733 /// let mut s = "Per Martin-Löf".to_string();
2735 /// let (first, last) = s.split_at_mut(3);
2736 /// first.make_ascii_uppercase();
2737 /// assert_eq!("PER", first);
2738 /// assert_eq!(" Martin-Löf", last);
2740 /// assert_eq!("PER Martin-Löf", s);
2743 #[stable(feature = "str_split_at", since = "1.4.0")]
2744 pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2745 // is_char_boundary checks that the index is in [0, .len()]
2746 if self.is_char_boundary(mid) {
2747 let len = self.len();
2748 let ptr = self.as_mut_ptr();
2749 // SAFETY: just checked that `mid` is on a char boundary.
2752 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
2753 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr.add(mid), len - mid)),
2757 slice_error_fail(self, 0, mid)
2761 /// Returns an iterator over the [`char`]s of a string slice.
2763 /// As a string slice consists of valid UTF-8, we can iterate through a
2764 /// string slice by [`char`]. This method returns such an iterator.
2766 /// It's important to remember that [`char`] represents a Unicode Scalar
2767 /// Value, and may not match your idea of what a 'character' is. Iteration
2768 /// over grapheme clusters may be what you actually want. This functionality
2769 /// is not provided by Rust's standard library, check crates.io instead.
2776 /// let word = "goodbye";
2778 /// let count = word.chars().count();
2779 /// assert_eq!(7, count);
2781 /// let mut chars = word.chars();
2783 /// assert_eq!(Some('g'), chars.next());
2784 /// assert_eq!(Some('o'), chars.next());
2785 /// assert_eq!(Some('o'), chars.next());
2786 /// assert_eq!(Some('d'), chars.next());
2787 /// assert_eq!(Some('b'), chars.next());
2788 /// assert_eq!(Some('y'), chars.next());
2789 /// assert_eq!(Some('e'), chars.next());
2791 /// assert_eq!(None, chars.next());
2794 /// Remember, [`char`]s may not match your human intuition about characters:
2799 /// let mut chars = y.chars();
2801 /// assert_eq!(Some('y'), chars.next()); // not 'y̆'
2802 /// assert_eq!(Some('\u{0306}'), chars.next());
2804 /// assert_eq!(None, chars.next());
2806 #[stable(feature = "rust1", since = "1.0.0")]
2808 pub fn chars(&self) -> Chars<'_> {
2809 Chars { iter: self.as_bytes().iter() }
2812 /// Returns an iterator over the [`char`]s of a string slice, and their
2815 /// As a string slice consists of valid UTF-8, we can iterate through a
2816 /// string slice by [`char`]. This method returns an iterator of both
2817 /// these [`char`]s, as well as their byte positions.
2819 /// The iterator yields tuples. The position is first, the [`char`] is
2827 /// let word = "goodbye";
2829 /// let count = word.char_indices().count();
2830 /// assert_eq!(7, count);
2832 /// let mut char_indices = word.char_indices();
2834 /// assert_eq!(Some((0, 'g')), char_indices.next());
2835 /// assert_eq!(Some((1, 'o')), char_indices.next());
2836 /// assert_eq!(Some((2, 'o')), char_indices.next());
2837 /// assert_eq!(Some((3, 'd')), char_indices.next());
2838 /// assert_eq!(Some((4, 'b')), char_indices.next());
2839 /// assert_eq!(Some((5, 'y')), char_indices.next());
2840 /// assert_eq!(Some((6, 'e')), char_indices.next());
2842 /// assert_eq!(None, char_indices.next());
2845 /// Remember, [`char`]s may not match your human intuition about characters:
2848 /// let yes = "y̆es";
2850 /// let mut char_indices = yes.char_indices();
2852 /// assert_eq!(Some((0, 'y')), char_indices.next()); // not (0, 'y̆')
2853 /// assert_eq!(Some((1, '\u{0306}')), char_indices.next());
2855 /// // note the 3 here - the last character took up two bytes
2856 /// assert_eq!(Some((3, 'e')), char_indices.next());
2857 /// assert_eq!(Some((4, 's')), char_indices.next());
2859 /// assert_eq!(None, char_indices.next());
2861 #[stable(feature = "rust1", since = "1.0.0")]
2863 pub fn char_indices(&self) -> CharIndices<'_> {
2864 CharIndices { front_offset: 0, iter: self.chars() }
2867 /// An iterator over the bytes of a string slice.
2869 /// As a string slice consists of a sequence of bytes, we can iterate
2870 /// through a string slice by byte. This method returns such an iterator.
2877 /// let mut bytes = "bors".bytes();
2879 /// assert_eq!(Some(b'b'), bytes.next());
2880 /// assert_eq!(Some(b'o'), bytes.next());
2881 /// assert_eq!(Some(b'r'), bytes.next());
2882 /// assert_eq!(Some(b's'), bytes.next());
2884 /// assert_eq!(None, bytes.next());
2886 #[stable(feature = "rust1", since = "1.0.0")]
2888 pub fn bytes(&self) -> Bytes<'_> {
2889 Bytes(self.as_bytes().iter().copied())
2892 /// Splits a string slice by whitespace.
2894 /// The iterator returned will return string slices that are sub-slices of
2895 /// the original string slice, separated by any amount of whitespace.
2897 /// 'Whitespace' is defined according to the terms of the Unicode Derived
2898 /// Core Property `White_Space`. If you only want to split on ASCII whitespace
2899 /// instead, use [`split_ascii_whitespace`].
2901 /// [`split_ascii_whitespace`]: str::split_ascii_whitespace
2908 /// let mut iter = "A few words".split_whitespace();
2910 /// assert_eq!(Some("A"), iter.next());
2911 /// assert_eq!(Some("few"), iter.next());
2912 /// assert_eq!(Some("words"), iter.next());
2914 /// assert_eq!(None, iter.next());
2917 /// All kinds of whitespace are considered:
2920 /// let mut iter = " Mary had\ta\u{2009}little \n\t lamb".split_whitespace();
2921 /// assert_eq!(Some("Mary"), iter.next());
2922 /// assert_eq!(Some("had"), iter.next());
2923 /// assert_eq!(Some("a"), iter.next());
2924 /// assert_eq!(Some("little"), iter.next());
2925 /// assert_eq!(Some("lamb"), iter.next());
2927 /// assert_eq!(None, iter.next());
2929 #[stable(feature = "split_whitespace", since = "1.1.0")]
2931 pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
2932 SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
2935 /// Splits a string slice by ASCII whitespace.
2937 /// The iterator returned will return string slices that are sub-slices of
2938 /// the original string slice, separated by any amount of ASCII whitespace.
2940 /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
2942 /// [`split_whitespace`]: str::split_whitespace
2949 /// let mut iter = "A few words".split_ascii_whitespace();
2951 /// assert_eq!(Some("A"), iter.next());
2952 /// assert_eq!(Some("few"), iter.next());
2953 /// assert_eq!(Some("words"), iter.next());
2955 /// assert_eq!(None, iter.next());
2958 /// All kinds of ASCII whitespace are considered:
2961 /// let mut iter = " Mary had\ta little \n\t lamb".split_ascii_whitespace();
2962 /// assert_eq!(Some("Mary"), iter.next());
2963 /// assert_eq!(Some("had"), iter.next());
2964 /// assert_eq!(Some("a"), iter.next());
2965 /// assert_eq!(Some("little"), iter.next());
2966 /// assert_eq!(Some("lamb"), iter.next());
2968 /// assert_eq!(None, iter.next());
2970 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
2972 pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
2974 self.as_bytes().split(IsAsciiWhitespace).filter(BytesIsNotEmpty).map(UnsafeBytesToStr);
2975 SplitAsciiWhitespace { inner }
2978 /// An iterator over the lines of a string, as string slices.
2980 /// Lines are ended with either a newline (`\n`) or a carriage return with
2981 /// a line feed (`\r\n`).
2983 /// The final line ending is optional.
2990 /// let text = "foo\r\nbar\n\nbaz\n";
2991 /// let mut lines = text.lines();
2993 /// assert_eq!(Some("foo"), lines.next());
2994 /// assert_eq!(Some("bar"), lines.next());
2995 /// assert_eq!(Some(""), lines.next());
2996 /// assert_eq!(Some("baz"), lines.next());
2998 /// assert_eq!(None, lines.next());
3001 /// The final line ending isn't required:
3004 /// let text = "foo\nbar\n\r\nbaz";
3005 /// let mut lines = text.lines();
3007 /// assert_eq!(Some("foo"), lines.next());
3008 /// assert_eq!(Some("bar"), lines.next());
3009 /// assert_eq!(Some(""), lines.next());
3010 /// assert_eq!(Some("baz"), lines.next());
3012 /// assert_eq!(None, lines.next());
3014 #[stable(feature = "rust1", since = "1.0.0")]
3016 pub fn lines(&self) -> Lines<'_> {
3017 Lines(self.split_terminator('\n').map(LinesAnyMap))
3020 /// An iterator over the lines of a string.
3021 #[stable(feature = "rust1", since = "1.0.0")]
3022 #[rustc_deprecated(since = "1.4.0", reason = "use lines() instead now")]
3024 #[allow(deprecated)]
3025 pub fn lines_any(&self) -> LinesAny<'_> {
3026 LinesAny(self.lines())
3029 /// Returns an iterator of `u16` over the string encoded as UTF-16.
3036 /// let text = "Zażółć gęślą jaźń";
3038 /// let utf8_len = text.len();
3039 /// let utf16_len = text.encode_utf16().count();
3041 /// assert!(utf16_len <= utf8_len);
3043 #[stable(feature = "encode_utf16", since = "1.8.0")]
3044 pub fn encode_utf16(&self) -> EncodeUtf16<'_> {
3045 EncodeUtf16 { chars: self.chars(), extra: 0 }
3048 /// Returns `true` if the given pattern matches a sub-slice of
3049 /// this string slice.
3051 /// Returns `false` if it does not.
3053 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3054 /// function or closure that determines if a character matches.
3056 /// [pattern]: self::pattern
3063 /// let bananas = "bananas";
3065 /// assert!(bananas.contains("nana"));
3066 /// assert!(!bananas.contains("apples"));
3068 #[stable(feature = "rust1", since = "1.0.0")]
3070 pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
3071 pat.is_contained_in(self)
3074 /// Returns `true` if the given pattern matches a prefix of this
3077 /// Returns `false` if it does not.
3079 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3080 /// function or closure that determines if a character matches.
3082 /// [pattern]: self::pattern
3089 /// let bananas = "bananas";
3091 /// assert!(bananas.starts_with("bana"));
3092 /// assert!(!bananas.starts_with("nana"));
3094 #[stable(feature = "rust1", since = "1.0.0")]
3095 pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
3096 pat.is_prefix_of(self)
3099 /// Returns `true` if the given pattern matches a suffix of this
3102 /// Returns `false` if it does not.
3104 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3105 /// function or closure that determines if a character matches.
3107 /// [pattern]: self::pattern
3114 /// let bananas = "bananas";
3116 /// assert!(bananas.ends_with("anas"));
3117 /// assert!(!bananas.ends_with("nana"));
3119 #[stable(feature = "rust1", since = "1.0.0")]
3120 pub fn ends_with<'a, P>(&'a self, pat: P) -> bool
3122 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3124 pat.is_suffix_of(self)
3127 /// Returns the byte index of the first character of this string slice that
3128 /// matches the pattern.
3130 /// Returns [`None`] if the pattern doesn't match.
3132 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3133 /// function or closure that determines if a character matches.
3135 /// [pattern]: self::pattern
3139 /// Simple patterns:
3142 /// let s = "Löwe 老虎 Léopard Gepardi";
3144 /// assert_eq!(s.find('L'), Some(0));
3145 /// assert_eq!(s.find('é'), Some(14));
3146 /// assert_eq!(s.find("pard"), Some(17));
3149 /// More complex patterns using point-free style and closures:
3152 /// let s = "Löwe 老虎 Léopard";
3154 /// assert_eq!(s.find(char::is_whitespace), Some(5));
3155 /// assert_eq!(s.find(char::is_lowercase), Some(1));
3156 /// assert_eq!(s.find(|c: char| c.is_whitespace() || c.is_lowercase()), Some(1));
3157 /// assert_eq!(s.find(|c: char| (c < 'o') && (c > 'a')), Some(4));
3160 /// Not finding the pattern:
3163 /// let s = "Löwe 老虎 Léopard";
3164 /// let x: &[_] = &['1', '2'];
3166 /// assert_eq!(s.find(x), None);
3168 #[stable(feature = "rust1", since = "1.0.0")]
3170 pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
3171 pat.into_searcher(self).next_match().map(|(i, _)| i)
3174 /// Returns the byte index for the first character of the rightmost match of the pattern in
3175 /// this string slice.
3177 /// Returns [`None`] if the pattern doesn't match.
3179 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3180 /// function or closure that determines if a character matches.
3182 /// [pattern]: self::pattern
3186 /// Simple patterns:
3189 /// let s = "Löwe 老虎 Léopard Gepardi";
3191 /// assert_eq!(s.rfind('L'), Some(13));
3192 /// assert_eq!(s.rfind('é'), Some(14));
3193 /// assert_eq!(s.rfind("pard"), Some(24));
3196 /// More complex patterns with closures:
3199 /// let s = "Löwe 老虎 Léopard";
3201 /// assert_eq!(s.rfind(char::is_whitespace), Some(12));
3202 /// assert_eq!(s.rfind(char::is_lowercase), Some(20));
3205 /// Not finding the pattern:
3208 /// let s = "Löwe 老虎 Léopard";
3209 /// let x: &[_] = &['1', '2'];
3211 /// assert_eq!(s.rfind(x), None);
3213 #[stable(feature = "rust1", since = "1.0.0")]
3215 pub fn rfind<'a, P>(&'a self, pat: P) -> Option<usize>
3217 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3219 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
3222 /// An iterator over substrings of this string slice, separated by
3223 /// characters matched by a pattern.
3225 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3226 /// function or closure that determines if a character matches.
3228 /// [pattern]: self::pattern
3230 /// # Iterator behavior
3232 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3233 /// allows a reverse search and forward/reverse search yields the same
3234 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3236 /// If the pattern allows a reverse search but its results might differ
3237 /// from a forward search, the [`rsplit`] method can be used.
3239 /// [`rsplit`]: str::rsplit
3243 /// Simple patterns:
3246 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
3247 /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
3249 /// let v: Vec<&str> = "".split('X').collect();
3250 /// assert_eq!(v, [""]);
3252 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
3253 /// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
3255 /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect();
3256 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3258 /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect();
3259 /// assert_eq!(v, ["abc", "def", "ghi"]);
3261 /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect();
3262 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3265 /// A more complex pattern, using a closure:
3268 /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect();
3269 /// assert_eq!(v, ["abc", "def", "ghi"]);
3272 /// If a string contains multiple contiguous separators, you will end up
3273 /// with empty strings in the output:
3276 /// let x = "||||a||b|c".to_string();
3277 /// let d: Vec<_> = x.split('|').collect();
3279 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3282 /// Contiguous separators are separated by the empty string.
3285 /// let x = "(///)".to_string();
3286 /// let d: Vec<_> = x.split('/').collect();
3288 /// assert_eq!(d, &["(", "", "", ")"]);
3291 /// Separators at the start or end of a string are neighbored
3292 /// by empty strings.
3295 /// let d: Vec<_> = "010".split("0").collect();
3296 /// assert_eq!(d, &["", "1", ""]);
3299 /// When the empty string is used as a separator, it separates
3300 /// every character in the string, along with the beginning
3301 /// and end of the string.
3304 /// let f: Vec<_> = "rust".split("").collect();
3305 /// assert_eq!(f, &["", "r", "u", "s", "t", ""]);
3308 /// Contiguous separators can lead to possibly surprising behavior
3309 /// when whitespace is used as the separator. This code is correct:
3312 /// let x = " a b c".to_string();
3313 /// let d: Vec<_> = x.split(' ').collect();
3315 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3318 /// It does _not_ give you:
3321 /// assert_eq!(d, &["a", "b", "c"]);
3324 /// Use [`split_whitespace`] for this behavior.
3326 /// [`split_whitespace`]: str::split_whitespace
3327 #[stable(feature = "rust1", since = "1.0.0")]
3329 pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
3330 Split(SplitInternal {
3333 matcher: pat.into_searcher(self),
3334 allow_trailing_empty: true,
3339 /// An iterator over substrings of this string slice, separated by
3340 /// characters matched by a pattern. Differs from the iterator produced by
3341 /// `split` in that `split_inclusive` leaves the matched part as the
3342 /// terminator of the substring.
3344 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3345 /// function or closure that determines if a character matches.
3347 /// [pattern]: self::pattern
3352 /// #![feature(split_inclusive)]
3353 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
3354 /// .split_inclusive('\n').collect();
3355 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
3358 /// If the last element of the string is matched,
3359 /// that element will be considered the terminator of the preceding substring.
3360 /// That substring will be the last item returned by the iterator.
3363 /// #![feature(split_inclusive)]
3364 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n"
3365 /// .split_inclusive('\n').collect();
3366 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb.\n"]);
3368 #[unstable(feature = "split_inclusive", issue = "72360")]
3370 pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> {
3371 SplitInclusive(SplitInternal {
3374 matcher: pat.into_searcher(self),
3375 allow_trailing_empty: false,
3380 /// An iterator over substrings of the given string slice, separated by
3381 /// characters matched by a pattern and yielded in reverse order.
3383 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3384 /// function or closure that determines if a character matches.
3386 /// [pattern]: self::pattern
3388 /// # Iterator behavior
3390 /// The returned iterator requires that the pattern supports a reverse
3391 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3392 /// search yields the same elements.
3394 /// For iterating from the front, the [`split`] method can be used.
3396 /// [`split`]: str::split
3400 /// Simple patterns:
3403 /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
3404 /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
3406 /// let v: Vec<&str> = "".rsplit('X').collect();
3407 /// assert_eq!(v, [""]);
3409 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect();
3410 /// assert_eq!(v, ["leopard", "tiger", "", "lion"]);
3412 /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
3413 /// assert_eq!(v, ["leopard", "tiger", "lion"]);
3416 /// A more complex pattern, using a closure:
3419 /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect();
3420 /// assert_eq!(v, ["ghi", "def", "abc"]);
3422 #[stable(feature = "rust1", since = "1.0.0")]
3424 pub fn rsplit<'a, P>(&'a self, pat: P) -> RSplit<'a, P>
3426 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3428 RSplit(self.split(pat).0)
3431 /// An iterator over substrings of the given string slice, separated by
3432 /// characters matched by a pattern.
3434 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3435 /// function or closure that determines if a character matches.
3437 /// [pattern]: self::pattern
3439 /// Equivalent to [`split`], except that the trailing substring
3440 /// is skipped if empty.
3442 /// [`split`]: str::split
3444 /// This method can be used for string data that is _terminated_,
3445 /// rather than _separated_ by a pattern.
3447 /// # Iterator behavior
3449 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3450 /// allows a reverse search and forward/reverse search yields the same
3451 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3453 /// If the pattern allows a reverse search but its results might differ
3454 /// from a forward search, the [`rsplit_terminator`] method can be used.
3456 /// [`rsplit_terminator`]: str::rsplit_terminator
3463 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
3464 /// assert_eq!(v, ["A", "B"]);
3466 /// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
3467 /// assert_eq!(v, ["A", "", "B", ""]);
3469 #[stable(feature = "rust1", since = "1.0.0")]
3471 pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
3472 SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 })
3475 /// An iterator over substrings of `self`, separated by characters
3476 /// matched by a pattern and yielded in reverse order.
3478 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3479 /// function or closure that determines if a character matches.
3481 /// [pattern]: self::pattern
3483 /// Equivalent to [`split`], except that the trailing substring is
3484 /// skipped if empty.
3486 /// [`split`]: str::split
3488 /// This method can be used for string data that is _terminated_,
3489 /// rather than _separated_ by a pattern.
3491 /// # Iterator behavior
3493 /// The returned iterator requires that the pattern supports a
3494 /// reverse search, and it will be double ended if a forward/reverse
3495 /// search yields the same elements.
3497 /// For iterating from the front, the [`split_terminator`] method can be
3500 /// [`split_terminator`]: str::split_terminator
3505 /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect();
3506 /// assert_eq!(v, ["B", "A"]);
3508 /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
3509 /// assert_eq!(v, ["", "B", "", "A"]);
3511 #[stable(feature = "rust1", since = "1.0.0")]
3513 pub fn rsplit_terminator<'a, P>(&'a self, pat: P) -> RSplitTerminator<'a, P>
3515 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3517 RSplitTerminator(self.split_terminator(pat).0)
3520 /// An iterator over substrings of the given string slice, separated by a
3521 /// pattern, restricted to returning at most `n` items.
3523 /// If `n` substrings are returned, the last substring (the `n`th substring)
3524 /// will contain the remainder of the string.
3526 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3527 /// function or closure that determines if a character matches.
3529 /// [pattern]: self::pattern
3531 /// # Iterator behavior
3533 /// The returned iterator will not be double ended, because it is
3534 /// not efficient to support.
3536 /// If the pattern allows a reverse search, the [`rsplitn`] method can be
3539 /// [`rsplitn`]: str::rsplitn
3543 /// Simple patterns:
3546 /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect();
3547 /// assert_eq!(v, ["Mary", "had", "a little lambda"]);
3549 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect();
3550 /// assert_eq!(v, ["lion", "", "tigerXleopard"]);
3552 /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
3553 /// assert_eq!(v, ["abcXdef"]);
3555 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
3556 /// assert_eq!(v, [""]);
3559 /// A more complex pattern, using a closure:
3562 /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect();
3563 /// assert_eq!(v, ["abc", "defXghi"]);
3565 #[stable(feature = "rust1", since = "1.0.0")]
3567 pub fn splitn<'a, P: Pattern<'a>>(&'a self, n: usize, pat: P) -> SplitN<'a, P> {
3568 SplitN(SplitNInternal { iter: self.split(pat).0, count: n })
3571 /// An iterator over substrings of this string slice, separated by a
3572 /// pattern, starting from the end of the string, restricted to returning
3573 /// at most `n` items.
3575 /// If `n` substrings are returned, the last substring (the `n`th substring)
3576 /// will contain the remainder of the string.
3578 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3579 /// function or closure that determines if a character matches.
3581 /// [pattern]: self::pattern
3583 /// # Iterator behavior
3585 /// The returned iterator will not be double ended, because it is not
3586 /// efficient to support.
3588 /// For splitting from the front, the [`splitn`] method can be used.
3590 /// [`splitn`]: str::splitn
3594 /// Simple patterns:
3597 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect();
3598 /// assert_eq!(v, ["lamb", "little", "Mary had a"]);
3600 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect();
3601 /// assert_eq!(v, ["leopard", "tiger", "lionX"]);
3603 /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect();
3604 /// assert_eq!(v, ["leopard", "lion::tiger"]);
3607 /// A more complex pattern, using a closure:
3610 /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect();
3611 /// assert_eq!(v, ["ghi", "abc1def"]);
3613 #[stable(feature = "rust1", since = "1.0.0")]
3615 pub fn rsplitn<'a, P>(&'a self, n: usize, pat: P) -> RSplitN<'a, P>
3617 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3619 RSplitN(self.splitn(n, pat).0)
3622 /// Splits the string on the first occurrence of the specified delimiter and
3623 /// returns prefix before delimiter and suffix after delimiter.
3628 /// #![feature(str_split_once)]
3630 /// assert_eq!("cfg".split_once('='), None);
3631 /// assert_eq!("cfg=foo".split_once('='), Some(("cfg", "foo")));
3632 /// assert_eq!("cfg=foo=bar".split_once('='), Some(("cfg", "foo=bar")));
3634 #[unstable(feature = "str_split_once", reason = "newly added", issue = "74773")]
3636 pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> {
3637 let (start, end) = delimiter.into_searcher(self).next_match()?;
3638 Some((&self[..start], &self[end..]))
3641 /// Splits the string on the last occurrence of the specified delimiter and
3642 /// returns prefix before delimiter and suffix after delimiter.
3647 /// #![feature(str_split_once)]
3649 /// assert_eq!("cfg".rsplit_once('='), None);
3650 /// assert_eq!("cfg=foo".rsplit_once('='), Some(("cfg", "foo")));
3651 /// assert_eq!("cfg=foo=bar".rsplit_once('='), Some(("cfg=foo", "bar")));
3653 #[unstable(feature = "str_split_once", reason = "newly added", issue = "74773")]
3655 pub fn rsplit_once<'a, P>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)>
3657 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3659 let (start, end) = delimiter.into_searcher(self).next_match_back()?;
3660 Some((&self[..start], &self[end..]))
3663 /// An iterator over the disjoint matches of a pattern within the given string
3666 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3667 /// function or closure that determines if a character matches.
3669 /// [pattern]: self::pattern
3671 /// # Iterator behavior
3673 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3674 /// allows a reverse search and forward/reverse search yields the same
3675 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3677 /// If the pattern allows a reverse search but its results might differ
3678 /// from a forward search, the [`rmatches`] method can be used.
3680 /// [`rmatches`]: str::matches
3687 /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
3688 /// assert_eq!(v, ["abc", "abc", "abc"]);
3690 /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect();
3691 /// assert_eq!(v, ["1", "2", "3"]);
3693 #[stable(feature = "str_matches", since = "1.2.0")]
3695 pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
3696 Matches(MatchesInternal(pat.into_searcher(self)))
3699 /// An iterator over the disjoint matches of a pattern within this string slice,
3700 /// yielded in reverse order.
3702 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3703 /// function or closure that determines if a character matches.
3705 /// [pattern]: self::pattern
3707 /// # Iterator behavior
3709 /// The returned iterator requires that the pattern supports a reverse
3710 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3711 /// search yields the same elements.
3713 /// For iterating from the front, the [`matches`] method can be used.
3715 /// [`matches`]: str::matches
3722 /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
3723 /// assert_eq!(v, ["abc", "abc", "abc"]);
3725 /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect();
3726 /// assert_eq!(v, ["3", "2", "1"]);
3728 #[stable(feature = "str_matches", since = "1.2.0")]
3730 pub fn rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P>
3732 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3734 RMatches(self.matches(pat).0)
3737 /// An iterator over the disjoint matches of a pattern within this string
3738 /// slice as well as the index that the match starts at.
3740 /// For matches of `pat` within `self` that overlap, only the indices
3741 /// corresponding to the first match are returned.
3743 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3744 /// function or closure that determines if a character matches.
3746 /// [pattern]: self::pattern
3748 /// # Iterator behavior
3750 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3751 /// allows a reverse search and forward/reverse search yields the same
3752 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3754 /// If the pattern allows a reverse search but its results might differ
3755 /// from a forward search, the [`rmatch_indices`] method can be used.
3757 /// [`rmatch_indices`]: str::match_indices
3764 /// let v: Vec<_> = "abcXXXabcYYYabc".match_indices("abc").collect();
3765 /// assert_eq!(v, [(0, "abc"), (6, "abc"), (12, "abc")]);
3767 /// let v: Vec<_> = "1abcabc2".match_indices("abc").collect();
3768 /// assert_eq!(v, [(1, "abc"), (4, "abc")]);
3770 /// let v: Vec<_> = "ababa".match_indices("aba").collect();
3771 /// assert_eq!(v, [(0, "aba")]); // only the first `aba`
3773 #[stable(feature = "str_match_indices", since = "1.5.0")]
3775 pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
3776 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
3779 /// An iterator over the disjoint matches of a pattern within `self`,
3780 /// yielded in reverse order along with the index of the match.
3782 /// For matches of `pat` within `self` that overlap, only the indices
3783 /// corresponding to the last match are returned.
3785 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3786 /// function or closure that determines if a character matches.
3788 /// [pattern]: self::pattern
3790 /// # Iterator behavior
3792 /// The returned iterator requires that the pattern supports a reverse
3793 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3794 /// search yields the same elements.
3796 /// For iterating from the front, the [`match_indices`] method can be used.
3798 /// [`match_indices`]: str::match_indices
3805 /// let v: Vec<_> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
3806 /// assert_eq!(v, [(12, "abc"), (6, "abc"), (0, "abc")]);
3808 /// let v: Vec<_> = "1abcabc2".rmatch_indices("abc").collect();
3809 /// assert_eq!(v, [(4, "abc"), (1, "abc")]);
3811 /// let v: Vec<_> = "ababa".rmatch_indices("aba").collect();
3812 /// assert_eq!(v, [(2, "aba")]); // only the last `aba`
3814 #[stable(feature = "str_match_indices", since = "1.5.0")]
3816 pub fn rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P>
3818 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3820 RMatchIndices(self.match_indices(pat).0)
3823 /// Returns a string slice with leading and trailing whitespace removed.
3825 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3826 /// Core Property `White_Space`.
3833 /// let s = " Hello\tworld\t";
3835 /// assert_eq!("Hello\tworld", s.trim());
3837 #[must_use = "this returns the trimmed string as a slice, \
3838 without modifying the original"]
3839 #[stable(feature = "rust1", since = "1.0.0")]
3840 pub fn trim(&self) -> &str {
3841 self.trim_matches(|c: char| c.is_whitespace())
3844 /// Returns a string slice with leading whitespace removed.
3846 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3847 /// Core Property `White_Space`.
3849 /// # Text directionality
3851 /// A string is a sequence of bytes. `start` in this context means the first
3852 /// position of that byte string; for a left-to-right language like English or
3853 /// Russian, this will be left side, and for right-to-left languages like
3854 /// Arabic or Hebrew, this will be the right side.
3861 /// let s = " Hello\tworld\t";
3862 /// assert_eq!("Hello\tworld\t", s.trim_start());
3868 /// let s = " English ";
3869 /// assert!(Some('E') == s.trim_start().chars().next());
3871 /// let s = " עברית ";
3872 /// assert!(Some('ע') == s.trim_start().chars().next());
3874 #[must_use = "this returns the trimmed string as a new slice, \
3875 without modifying the original"]
3876 #[stable(feature = "trim_direction", since = "1.30.0")]
3877 pub fn trim_start(&self) -> &str {
3878 self.trim_start_matches(|c: char| c.is_whitespace())
3881 /// Returns a string slice with trailing whitespace removed.
3883 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3884 /// Core Property `White_Space`.
3886 /// # Text directionality
3888 /// A string is a sequence of bytes. `end` in this context means the last
3889 /// position of that byte string; for a left-to-right language like English or
3890 /// Russian, this will be right side, and for right-to-left languages like
3891 /// Arabic or Hebrew, this will be the left side.
3898 /// let s = " Hello\tworld\t";
3899 /// assert_eq!(" Hello\tworld", s.trim_end());
3905 /// let s = " English ";
3906 /// assert!(Some('h') == s.trim_end().chars().rev().next());
3908 /// let s = " עברית ";
3909 /// assert!(Some('ת') == s.trim_end().chars().rev().next());
3911 #[must_use = "this returns the trimmed string as a new slice, \
3912 without modifying the original"]
3913 #[stable(feature = "trim_direction", since = "1.30.0")]
3914 pub fn trim_end(&self) -> &str {
3915 self.trim_end_matches(|c: char| c.is_whitespace())
3918 /// Returns a string slice with leading whitespace removed.
3920 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3921 /// Core Property `White_Space`.
3923 /// # Text directionality
3925 /// A string is a sequence of bytes. 'Left' in this context means the first
3926 /// position of that byte string; for a language like Arabic or Hebrew
3927 /// which are 'right to left' rather than 'left to right', this will be
3928 /// the _right_ side, not the left.
3935 /// let s = " Hello\tworld\t";
3937 /// assert_eq!("Hello\tworld\t", s.trim_left());
3943 /// let s = " English";
3944 /// assert!(Some('E') == s.trim_left().chars().next());
3946 /// let s = " עברית";
3947 /// assert!(Some('ע') == s.trim_left().chars().next());
3949 #[stable(feature = "rust1", since = "1.0.0")]
3952 reason = "superseded by `trim_start`",
3953 suggestion = "trim_start"
3955 pub fn trim_left(&self) -> &str {
3959 /// Returns a string slice with trailing whitespace removed.
3961 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3962 /// Core Property `White_Space`.
3964 /// # Text directionality
3966 /// A string is a sequence of bytes. 'Right' in this context means the last
3967 /// position of that byte string; for a language like Arabic or Hebrew
3968 /// which are 'right to left' rather than 'left to right', this will be
3969 /// the _left_ side, not the right.
3976 /// let s = " Hello\tworld\t";
3978 /// assert_eq!(" Hello\tworld", s.trim_right());
3984 /// let s = "English ";
3985 /// assert!(Some('h') == s.trim_right().chars().rev().next());
3987 /// let s = "עברית ";
3988 /// assert!(Some('ת') == s.trim_right().chars().rev().next());
3990 #[stable(feature = "rust1", since = "1.0.0")]
3993 reason = "superseded by `trim_end`",
3994 suggestion = "trim_end"
3996 pub fn trim_right(&self) -> &str {
4000 /// Returns a string slice with all prefixes and suffixes that match a
4001 /// pattern repeatedly removed.
4003 /// The [pattern] can be a [`char`], a slice of [`char`]s, or a function
4004 /// or closure that determines if a character matches.
4006 /// [pattern]: self::pattern
4010 /// Simple patterns:
4013 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
4014 /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar");
4016 /// let x: &[_] = &['1', '2'];
4017 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
4020 /// A more complex pattern, using a closure:
4023 /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar");
4025 #[must_use = "this returns the trimmed string as a new slice, \
4026 without modifying the original"]
4027 #[stable(feature = "rust1", since = "1.0.0")]
4028 pub fn trim_matches<'a, P>(&'a self, pat: P) -> &'a str
4030 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
4034 let mut matcher = pat.into_searcher(self);
4035 if let Some((a, b)) = matcher.next_reject() {
4037 j = b; // Remember earliest known match, correct it below if
4038 // last match is different
4040 if let Some((_, b)) = matcher.next_reject_back() {
4043 // SAFETY: `Searcher` is known to return valid indices.
4044 unsafe { self.get_unchecked(i..j) }
4047 /// Returns a string slice with all prefixes that match a pattern
4048 /// repeatedly removed.
4050 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4051 /// function or closure that determines if a character matches.
4053 /// [pattern]: self::pattern
4055 /// # Text directionality
4057 /// A string is a sequence of bytes. `start` in this context means the first
4058 /// position of that byte string; for a left-to-right language like English or
4059 /// Russian, this will be left side, and for right-to-left languages like
4060 /// Arabic or Hebrew, this will be the right side.
4067 /// assert_eq!("11foo1bar11".trim_start_matches('1'), "foo1bar11");
4068 /// assert_eq!("123foo1bar123".trim_start_matches(char::is_numeric), "foo1bar123");
4070 /// let x: &[_] = &['1', '2'];
4071 /// assert_eq!("12foo1bar12".trim_start_matches(x), "foo1bar12");
4073 #[must_use = "this returns the trimmed string as a new slice, \
4074 without modifying the original"]
4075 #[stable(feature = "trim_direction", since = "1.30.0")]
4076 pub fn trim_start_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
4077 let mut i = self.len();
4078 let mut matcher = pat.into_searcher(self);
4079 if let Some((a, _)) = matcher.next_reject() {
4082 // SAFETY: `Searcher` is known to return valid indices.
4083 unsafe { self.get_unchecked(i..self.len()) }
4086 /// Returns a string slice with the prefix removed.
4088 /// If the string starts with the pattern `prefix`, `Some` is returned with the substring where
4089 /// the prefix is removed. Unlike `trim_start_matches`, this method removes the prefix exactly
4092 /// If the string does not start with `prefix`, `None` is returned.
4094 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4095 /// function or closure that determines if a character matches.
4097 /// [pattern]: self::pattern
4102 /// assert_eq!("foo:bar".strip_prefix("foo:"), Some("bar"));
4103 /// assert_eq!("foo:bar".strip_prefix("bar"), None);
4104 /// assert_eq!("foofoo".strip_prefix("foo"), Some("foo"));
4106 #[must_use = "this returns the remaining substring as a new slice, \
4107 without modifying the original"]
4108 #[stable(feature = "str_strip", since = "1.45.0")]
4109 pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> {
4110 prefix.strip_prefix_of(self)
4113 /// Returns a string slice with the suffix removed.
4115 /// If the string ends with the pattern `suffix`, `Some` is returned with the substring where
4116 /// the suffix is removed. Unlike `trim_end_matches`, this method removes the suffix exactly
4119 /// If the string does not end with `suffix`, `None` is returned.
4121 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4122 /// function or closure that determines if a character matches.
4124 /// [pattern]: self::pattern
4129 /// assert_eq!("bar:foo".strip_suffix(":foo"), Some("bar"));
4130 /// assert_eq!("bar:foo".strip_suffix("bar"), None);
4131 /// assert_eq!("foofoo".strip_suffix("foo"), Some("foo"));
4133 #[must_use = "this returns the remaining substring as a new slice, \
4134 without modifying the original"]
4135 #[stable(feature = "str_strip", since = "1.45.0")]
4136 pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a str>
4139 <P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
4141 suffix.strip_suffix_of(self)
4144 /// Returns a string slice with all suffixes that match a pattern
4145 /// repeatedly removed.
4147 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4148 /// function or closure that determines if a character matches.
4150 /// [pattern]: self::pattern
4152 /// # Text directionality
4154 /// A string is a sequence of bytes. `end` in this context means the last
4155 /// position of that byte string; for a left-to-right language like English or
4156 /// Russian, this will be right side, and for right-to-left languages like
4157 /// Arabic or Hebrew, this will be the left side.
4161 /// Simple patterns:
4164 /// assert_eq!("11foo1bar11".trim_end_matches('1'), "11foo1bar");
4165 /// assert_eq!("123foo1bar123".trim_end_matches(char::is_numeric), "123foo1bar");
4167 /// let x: &[_] = &['1', '2'];
4168 /// assert_eq!("12foo1bar12".trim_end_matches(x), "12foo1bar");
4171 /// A more complex pattern, using a closure:
4174 /// assert_eq!("1fooX".trim_end_matches(|c| c == '1' || c == 'X'), "1foo");
4176 #[must_use = "this returns the trimmed string as a new slice, \
4177 without modifying the original"]
4178 #[stable(feature = "trim_direction", since = "1.30.0")]
4179 pub fn trim_end_matches<'a, P>(&'a self, pat: P) -> &'a str
4181 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
4184 let mut matcher = pat.into_searcher(self);
4185 if let Some((_, b)) = matcher.next_reject_back() {
4188 // SAFETY: `Searcher` is known to return valid indices.
4189 unsafe { self.get_unchecked(0..j) }
4192 /// Returns a string slice with all prefixes that match a pattern
4193 /// repeatedly removed.
4195 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4196 /// function or closure that determines if a character matches.
4198 /// [pattern]: self::pattern
4200 /// # Text directionality
4202 /// A string is a sequence of bytes. 'Left' in this context means the first
4203 /// position of that byte string; for a language like Arabic or Hebrew
4204 /// which are 'right to left' rather than 'left to right', this will be
4205 /// the _right_ side, not the left.
4212 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
4213 /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123");
4215 /// let x: &[_] = &['1', '2'];
4216 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
4218 #[stable(feature = "rust1", since = "1.0.0")]
4221 reason = "superseded by `trim_start_matches`",
4222 suggestion = "trim_start_matches"
4224 pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
4225 self.trim_start_matches(pat)
4228 /// Returns a string slice with all suffixes that match a pattern
4229 /// repeatedly removed.
4231 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4232 /// function or closure that determines if a character matches.
4234 /// [pattern]: self::pattern
4236 /// # Text directionality
4238 /// A string is a sequence of bytes. 'Right' in this context means the last
4239 /// position of that byte string; for a language like Arabic or Hebrew
4240 /// which are 'right to left' rather than 'left to right', this will be
4241 /// the _left_ side, not the right.
4245 /// Simple patterns:
4248 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
4249 /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar");
4251 /// let x: &[_] = &['1', '2'];
4252 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
4255 /// A more complex pattern, using a closure:
4258 /// assert_eq!("1fooX".trim_right_matches(|c| c == '1' || c == 'X'), "1foo");
4260 #[stable(feature = "rust1", since = "1.0.0")]
4263 reason = "superseded by `trim_end_matches`",
4264 suggestion = "trim_end_matches"
4266 pub fn trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str
4268 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
4270 self.trim_end_matches(pat)
4273 /// Parses this string slice into another type.
4275 /// Because `parse` is so general, it can cause problems with type
4276 /// inference. As such, `parse` is one of the few times you'll see
4277 /// the syntax affectionately known as the 'turbofish': `::<>`. This
4278 /// helps the inference algorithm understand specifically which type
4279 /// you're trying to parse into.
4281 /// `parse` can parse any type that implements the [`FromStr`] trait.
4286 /// Will return [`Err`] if it's not possible to parse this string slice into
4287 /// the desired type.
4289 /// [`Err`]: FromStr::Err
4296 /// let four: u32 = "4".parse().unwrap();
4298 /// assert_eq!(4, four);
4301 /// Using the 'turbofish' instead of annotating `four`:
4304 /// let four = "4".parse::<u32>();
4306 /// assert_eq!(Ok(4), four);
4309 /// Failing to parse:
4312 /// let nope = "j".parse::<u32>();
4314 /// assert!(nope.is_err());
4317 #[stable(feature = "rust1", since = "1.0.0")]
4318 pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
4319 FromStr::from_str(self)
4322 /// Checks if all characters in this string are within the ASCII range.
4327 /// let ascii = "hello!\n";
4328 /// let non_ascii = "Grüße, Jürgen ❤";
4330 /// assert!(ascii.is_ascii());
4331 /// assert!(!non_ascii.is_ascii());
4333 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4335 pub fn is_ascii(&self) -> bool {
4336 // We can treat each byte as character here: all multibyte characters
4337 // start with a byte that is not in the ascii range, so we will stop
4339 self.as_bytes().is_ascii()
4342 /// Checks that two strings are an ASCII case-insensitive match.
4344 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
4345 /// but without allocating and copying temporaries.
4350 /// assert!("Ferris".eq_ignore_ascii_case("FERRIS"));
4351 /// assert!("Ferrös".eq_ignore_ascii_case("FERRöS"));
4352 /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
4354 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4356 pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
4357 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
4360 /// Converts this string to its ASCII upper case equivalent in-place.
4362 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
4363 /// but non-ASCII letters are unchanged.
4365 /// To return a new uppercased value without modifying the existing one, use
4366 /// [`to_ascii_uppercase`].
4368 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
4373 /// let mut s = String::from("Grüße, Jürgen ❤");
4375 /// s.make_ascii_uppercase();
4377 /// assert_eq!("GRüßE, JüRGEN ❤", s);
4379 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4380 pub fn make_ascii_uppercase(&mut self) {
4381 // SAFETY: safe because we transmute two types with the same layout.
4382 let me = unsafe { self.as_bytes_mut() };
4383 me.make_ascii_uppercase()
4386 /// Converts this string to its ASCII lower case equivalent in-place.
4388 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
4389 /// but non-ASCII letters are unchanged.
4391 /// To return a new lowercased value without modifying the existing one, use
4392 /// [`to_ascii_lowercase`].
4394 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
4399 /// let mut s = String::from("GRÜßE, JÜRGEN ❤");
4401 /// s.make_ascii_lowercase();
4403 /// assert_eq!("grÜße, jÜrgen ❤", s);
4405 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4406 pub fn make_ascii_lowercase(&mut self) {
4407 // SAFETY: safe because we transmute two types with the same layout.
4408 let me = unsafe { self.as_bytes_mut() };
4409 me.make_ascii_lowercase()
4412 /// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
4414 /// Note: only extended grapheme codepoints that begin the string will be
4422 /// for c in "❤\n!".escape_debug() {
4423 /// print!("{}", c);
4428 /// Using `println!` directly:
4431 /// println!("{}", "❤\n!".escape_debug());
4435 /// Both are equivalent to:
4438 /// println!("❤\\n!");
4441 /// Using `to_string`:
4444 /// assert_eq!("❤\n!".escape_debug().to_string(), "❤\\n!");
4446 #[stable(feature = "str_escape", since = "1.34.0")]
4447 pub fn escape_debug(&self) -> EscapeDebug<'_> {
4448 let mut chars = self.chars();
4452 .map(|first| first.escape_debug_ext(true))
4455 .chain(chars.flat_map(CharEscapeDebugContinue)),
4459 /// Return an iterator that escapes each char in `self` with [`char::escape_default`].
4466 /// for c in "❤\n!".escape_default() {
4467 /// print!("{}", c);
4472 /// Using `println!` directly:
4475 /// println!("{}", "❤\n!".escape_default());
4479 /// Both are equivalent to:
4482 /// println!("\\u{{2764}}\\n!");
4485 /// Using `to_string`:
4488 /// assert_eq!("❤\n!".escape_default().to_string(), "\\u{2764}\\n!");
4490 #[stable(feature = "str_escape", since = "1.34.0")]
4491 pub fn escape_default(&self) -> EscapeDefault<'_> {
4492 EscapeDefault { inner: self.chars().flat_map(CharEscapeDefault) }
4495 /// Return an iterator that escapes each char in `self` with [`char::escape_unicode`].
4502 /// for c in "❤\n!".escape_unicode() {
4503 /// print!("{}", c);
4508 /// Using `println!` directly:
4511 /// println!("{}", "❤\n!".escape_unicode());
4515 /// Both are equivalent to:
4518 /// println!("\\u{{2764}}\\u{{a}}\\u{{21}}");
4521 /// Using `to_string`:
4524 /// assert_eq!("❤\n!".escape_unicode().to_string(), "\\u{2764}\\u{a}\\u{21}");
4526 #[stable(feature = "str_escape", since = "1.34.0")]
4527 pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
4528 EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) }
4534 struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
4535 c.escape_debug_ext(false)
4539 struct CharEscapeUnicode impl Fn = |c: char| -> char::EscapeUnicode {
4543 struct CharEscapeDefault impl Fn = |c: char| -> char::EscapeDefault {
4548 #[stable(feature = "rust1", since = "1.0.0")]
4549 impl AsRef<[u8]> for str {
4551 fn as_ref(&self) -> &[u8] {
4556 #[stable(feature = "rust1", since = "1.0.0")]
4557 impl Default for &str {
4558 /// Creates an empty str
4559 fn default() -> Self {
4564 #[stable(feature = "default_mut_str", since = "1.28.0")]
4565 impl Default for &mut str {
4566 /// Creates an empty mutable str
4567 fn default() -> Self {
4568 // SAFETY: The empty string is valid UTF-8.
4569 unsafe { from_utf8_unchecked_mut(&mut []) }
4573 /// An iterator over the non-whitespace substrings of a string,
4574 /// separated by any amount of whitespace.
4576 /// This struct is created by the [`split_whitespace`] method on [`str`].
4577 /// See its documentation for more.
4579 /// [`split_whitespace`]: str::split_whitespace
4580 #[stable(feature = "split_whitespace", since = "1.1.0")]
4581 #[derive(Clone, Debug)]
4582 pub struct SplitWhitespace<'a> {
4583 inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
4586 /// An iterator over the non-ASCII-whitespace substrings of a string,
4587 /// separated by any amount of ASCII whitespace.
4589 /// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
4590 /// See its documentation for more.
4592 /// [`split_ascii_whitespace`]: str::split_ascii_whitespace
4593 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4594 #[derive(Clone, Debug)]
4595 pub struct SplitAsciiWhitespace<'a> {
4596 inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, BytesIsNotEmpty>, UnsafeBytesToStr>,
4599 /// An iterator over the substrings of a string,
4600 /// terminated by a substring matching to a predicate function
4601 /// Unlike `Split`, it contains the matched part as a terminator
4602 /// of the subslice.
4604 /// This struct is created by the [`split_inclusive`] method on [`str`].
4605 /// See its documentation for more.
4607 /// [`split_inclusive`]: str::split_inclusive
4608 #[unstable(feature = "split_inclusive", issue = "72360")]
4609 pub struct SplitInclusive<'a, P: Pattern<'a>>(SplitInternal<'a, P>);
4613 struct IsWhitespace impl Fn = |c: char| -> bool {
4618 struct IsAsciiWhitespace impl Fn = |byte: &u8| -> bool {
4619 byte.is_ascii_whitespace()
4623 struct IsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b str| -> bool {
4628 struct BytesIsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b [u8]| -> bool {
4633 struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
4635 unsafe { from_utf8_unchecked(bytes) }
4639 #[stable(feature = "split_whitespace", since = "1.1.0")]
4640 impl<'a> Iterator for SplitWhitespace<'a> {
4641 type Item = &'a str;
4644 fn next(&mut self) -> Option<&'a str> {
4649 fn size_hint(&self) -> (usize, Option<usize>) {
4650 self.inner.size_hint()
4654 fn last(mut self) -> Option<&'a str> {
4659 #[stable(feature = "split_whitespace", since = "1.1.0")]
4660 impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
4662 fn next_back(&mut self) -> Option<&'a str> {
4663 self.inner.next_back()
4667 #[stable(feature = "fused", since = "1.26.0")]
4668 impl FusedIterator for SplitWhitespace<'_> {}
4670 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4671 impl<'a> Iterator for SplitAsciiWhitespace<'a> {
4672 type Item = &'a str;
4675 fn next(&mut self) -> Option<&'a str> {
4680 fn size_hint(&self) -> (usize, Option<usize>) {
4681 self.inner.size_hint()
4685 fn last(mut self) -> Option<&'a str> {
4690 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4691 impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> {
4693 fn next_back(&mut self) -> Option<&'a str> {
4694 self.inner.next_back()
4698 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4699 impl FusedIterator for SplitAsciiWhitespace<'_> {}
4701 #[unstable(feature = "split_inclusive", issue = "72360")]
4702 impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> {
4703 type Item = &'a str;
4706 fn next(&mut self) -> Option<&'a str> {
4707 self.0.next_inclusive()
4711 #[unstable(feature = "split_inclusive", issue = "72360")]
4712 impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> {
4713 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4714 f.debug_struct("SplitInclusive").field("0", &self.0).finish()
4718 // FIXME(#26925) Remove in favor of `#[derive(Clone)]`
4719 #[unstable(feature = "split_inclusive", issue = "72360")]
4720 impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitInclusive<'a, P> {
4721 fn clone(&self) -> Self {
4722 SplitInclusive(self.0.clone())
4726 #[unstable(feature = "split_inclusive", issue = "72360")]
4727 impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator
4728 for SplitInclusive<'a, P>
4731 fn next_back(&mut self) -> Option<&'a str> {
4732 self.0.next_back_inclusive()
4736 #[unstable(feature = "split_inclusive", issue = "72360")]
4737 impl<'a, P: Pattern<'a>> FusedIterator for SplitInclusive<'a, P> {}
4739 /// An iterator of [`u16`] over the string encoded as UTF-16.
4741 /// This struct is created by the [`encode_utf16`] method on [`str`].
4742 /// See its documentation for more.
4744 /// [`encode_utf16`]: str::encode_utf16
4746 #[stable(feature = "encode_utf16", since = "1.8.0")]
4747 pub struct EncodeUtf16<'a> {
4752 #[stable(feature = "collection_debug", since = "1.17.0")]
4753 impl fmt::Debug for EncodeUtf16<'_> {
4754 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4755 f.pad("EncodeUtf16 { .. }")
4759 #[stable(feature = "encode_utf16", since = "1.8.0")]
4760 impl<'a> Iterator for EncodeUtf16<'a> {
4764 fn next(&mut self) -> Option<u16> {
4765 if self.extra != 0 {
4766 let tmp = self.extra;
4771 let mut buf = [0; 2];
4772 self.chars.next().map(|ch| {
4773 let n = ch.encode_utf16(&mut buf).len();
4775 self.extra = buf[1];
4782 fn size_hint(&self) -> (usize, Option<usize>) {
4783 let (low, high) = self.chars.size_hint();
4784 // every char gets either one u16 or two u16,
4785 // so this iterator is between 1 or 2 times as
4786 // long as the underlying iterator.
4787 (low, high.and_then(|n| n.checked_mul(2)))
4791 #[stable(feature = "fused", since = "1.26.0")]
4792 impl FusedIterator for EncodeUtf16<'_> {}
4794 /// The return type of [`str::escape_debug`].
4795 #[stable(feature = "str_escape", since = "1.34.0")]
4796 #[derive(Clone, Debug)]
4797 pub struct EscapeDebug<'a> {
4799 Flatten<option::IntoIter<char::EscapeDebug>>,
4800 FlatMap<Chars<'a>, char::EscapeDebug, CharEscapeDebugContinue>,
4804 /// The return type of [`str::escape_default`].
4805 #[stable(feature = "str_escape", since = "1.34.0")]
4806 #[derive(Clone, Debug)]
4807 pub struct EscapeDefault<'a> {
4808 inner: FlatMap<Chars<'a>, char::EscapeDefault, CharEscapeDefault>,
4811 /// The return type of [`str::escape_unicode`].
4812 #[stable(feature = "str_escape", since = "1.34.0")]
4813 #[derive(Clone, Debug)]
4814 pub struct EscapeUnicode<'a> {
4815 inner: FlatMap<Chars<'a>, char::EscapeUnicode, CharEscapeUnicode>,
4818 macro_rules! escape_types_impls {
4819 ($( $Name: ident ),+) => {$(
4820 #[stable(feature = "str_escape", since = "1.34.0")]
4821 impl<'a> fmt::Display for $Name<'a> {
4822 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4823 self.clone().try_for_each(|c| f.write_char(c))
4827 #[stable(feature = "str_escape", since = "1.34.0")]
4828 impl<'a> Iterator for $Name<'a> {
4832 fn next(&mut self) -> Option<char> { self.inner.next() }
4835 fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
4838 fn try_fold<Acc, Fold, R>(&mut self, init: Acc, fold: Fold) -> R where
4839 Self: Sized, Fold: FnMut(Acc, Self::Item) -> R, R: Try<Ok=Acc>
4841 self.inner.try_fold(init, fold)
4845 fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
4846 where Fold: FnMut(Acc, Self::Item) -> Acc,
4848 self.inner.fold(init, fold)
4852 #[stable(feature = "str_escape", since = "1.34.0")]
4853 impl<'a> FusedIterator for $Name<'a> {}
4857 escape_types_impls!(EscapeDebug, EscapeDefault, EscapeUnicode);