1 // ignore-tidy-filelength
3 //! String manipulation.
5 //! For more details, see the [`std::str`] module.
9 #![stable(feature = "rust1", since = "1.0.0")]
11 use self::pattern::Pattern;
12 use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
15 use crate::fmt::{self, Write};
16 use crate::iter::{Chain, FlatMap, Flatten};
17 use crate::iter::{Copied, Filter, FusedIterator, Map, TrustedLen, TrustedRandomAccess};
21 use crate::slice::{self, SliceIndex, Split as SliceSplit};
25 #[unstable(feature = "str_internals", issue = "none")]
26 #[allow(missing_docs)]
29 /// Parse a value from a string
31 /// `FromStr`'s [`from_str`] method is often used implicitly, through
32 /// [`str`]'s [`parse`] method. See [`parse`]'s documentation for examples.
34 /// [`from_str`]: FromStr::from_str
35 /// [`parse`]: str::parse
37 /// `FromStr` does not have a lifetime parameter, and so you can only parse types
38 /// that do not contain a lifetime parameter themselves. In other words, you can
39 /// parse an `i32` with `FromStr`, but not a `&i32`. You can parse a struct that
40 /// contains an `i32`, but not one that contains an `&i32`.
44 /// Basic implementation of `FromStr` on an example `Point` type:
47 /// use std::str::FromStr;
48 /// use std::num::ParseIntError;
50 /// #[derive(Debug, PartialEq)]
56 /// impl FromStr for Point {
57 /// type Err = ParseIntError;
59 /// fn from_str(s: &str) -> Result<Self, Self::Err> {
60 /// let coords: Vec<&str> = s.trim_matches(|p| p == '(' || p == ')' )
64 /// let x_fromstr = coords[0].parse::<i32>()?;
65 /// let y_fromstr = coords[1].parse::<i32>()?;
67 /// Ok(Point { x: x_fromstr, y: y_fromstr })
71 /// let p = Point::from_str("(1,2)");
72 /// assert_eq!(p.unwrap(), Point{ x: 1, y: 2} )
74 #[stable(feature = "rust1", since = "1.0.0")]
75 pub trait FromStr: Sized {
76 /// The associated error which can be returned from parsing.
77 #[stable(feature = "rust1", since = "1.0.0")]
80 /// Parses a string `s` to return a value of this type.
82 /// If parsing succeeds, return the value inside [`Ok`], otherwise
83 /// when the string is ill-formatted return an error specific to the
84 /// inside [`Err`]. The error type is specific to implementation of the trait.
86 /// [`Ok`]: ../../std/result/enum.Result.html#variant.Ok
87 /// [`Err`]: ../../std/result/enum.Result.html#variant.Err
91 /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
93 /// [ithirtytwo]: ../../std/primitive.i32.html
96 /// use std::str::FromStr;
99 /// let x = i32::from_str(s).unwrap();
101 /// assert_eq!(5, x);
103 #[stable(feature = "rust1", since = "1.0.0")]
104 fn from_str(s: &str) -> Result<Self, Self::Err>;
107 #[stable(feature = "rust1", since = "1.0.0")]
108 impl FromStr for bool {
109 type Err = ParseBoolError;
111 /// Parse a `bool` from a string.
113 /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
114 /// actually be parseable.
119 /// use std::str::FromStr;
121 /// assert_eq!(FromStr::from_str("true"), Ok(true));
122 /// assert_eq!(FromStr::from_str("false"), Ok(false));
123 /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
126 /// Note, in many cases, the `.parse()` method on `str` is more proper.
129 /// assert_eq!("true".parse(), Ok(true));
130 /// assert_eq!("false".parse(), Ok(false));
131 /// assert!("not even a boolean".parse::<bool>().is_err());
134 fn from_str(s: &str) -> Result<bool, ParseBoolError> {
137 "false" => Ok(false),
138 _ => Err(ParseBoolError { _priv: () }),
143 /// An error returned when parsing a `bool` using [`from_str`] fails
145 /// [`from_str`]: FromStr::from_str
146 #[derive(Debug, Clone, PartialEq, Eq)]
147 #[stable(feature = "rust1", since = "1.0.0")]
148 pub struct ParseBoolError {
152 #[stable(feature = "rust1", since = "1.0.0")]
153 impl fmt::Display for ParseBoolError {
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 "provided string was not `true` or `false`".fmt(f)
160 Section: Creating a string
163 /// Errors which can occur when attempting to interpret a sequence of [`u8`]
166 /// As such, the `from_utf8` family of functions and methods for both [`String`]s
167 /// and [`&str`]s make use of this error, for example.
169 /// [`String`]: ../../std/string/struct.String.html#method.from_utf8
170 /// [`&str`]: from_utf8
174 /// This error type’s methods can be used to create functionality
175 /// similar to `String::from_utf8_lossy` without allocating heap memory:
178 /// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) {
180 /// match std::str::from_utf8(input) {
186 /// let (valid, after_valid) = input.split_at(error.valid_up_to());
188 /// push(std::str::from_utf8_unchecked(valid))
190 /// push("\u{FFFD}");
192 /// if let Some(invalid_sequence_length) = error.error_len() {
193 /// input = &after_valid[invalid_sequence_length..]
202 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
203 #[stable(feature = "rust1", since = "1.0.0")]
204 pub struct Utf8Error {
206 error_len: Option<u8>,
210 /// Returns the index in the given string up to which valid UTF-8 was
213 /// It is the maximum index such that `from_utf8(&input[..index])`
214 /// would return `Ok(_)`.
223 /// // some invalid bytes, in a vector
224 /// let sparkle_heart = vec![0, 159, 146, 150];
226 /// // std::str::from_utf8 returns a Utf8Error
227 /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
229 /// // the second byte is invalid here
230 /// assert_eq!(1, error.valid_up_to());
232 #[stable(feature = "utf8_error", since = "1.5.0")]
233 pub fn valid_up_to(&self) -> usize {
237 /// Provides more information about the failure:
239 /// * `None`: the end of the input was reached unexpectedly.
240 /// `self.valid_up_to()` is 1 to 3 bytes from the end of the input.
241 /// If a byte stream (such as a file or a network socket) is being decoded incrementally,
242 /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
244 /// * `Some(len)`: an unexpected byte was encountered.
245 /// The length provided is that of the invalid byte sequence
246 /// that starts at the index given by `valid_up_to()`.
247 /// Decoding should resume after that sequence
248 /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of
251 /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
252 #[stable(feature = "utf8_error_error_len", since = "1.20.0")]
253 pub fn error_len(&self) -> Option<usize> {
254 self.error_len.map(|len| len as usize)
258 /// Converts a slice of bytes to a string slice.
260 /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice
261 /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between
262 /// the two. Not all byte slices are valid string slices, however: [`&str`] requires
263 /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid
264 /// UTF-8, and then does the conversion.
267 /// [byteslice]: ../../std/primitive.slice.html
269 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
270 /// incur the overhead of the validity check, there is an unsafe version of
271 /// this function, [`from_utf8_unchecked`][fromutf8u], which has the same
272 /// behavior but skips the check.
274 /// [fromutf8u]: fn.from_utf8_unchecked.html
276 /// If you need a `String` instead of a `&str`, consider
277 /// [`String::from_utf8`][string].
279 /// [string]: ../../std/string/struct.String.html#method.from_utf8
281 /// Because you can stack-allocate a `[u8; N]`, and you can take a
282 /// [`&[u8]`][byteslice] of it, this function is one way to have a
283 /// stack-allocated string. There is an example of this in the
284 /// examples section below.
286 /// [byteslice]: ../../std/primitive.slice.html
290 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
291 /// provided slice is not UTF-8.
300 /// // some bytes, in a vector
301 /// let sparkle_heart = vec![240, 159, 146, 150];
303 /// // We know these bytes are valid, so just use `unwrap()`.
304 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
306 /// assert_eq!("💖", sparkle_heart);
314 /// // some invalid bytes, in a vector
315 /// let sparkle_heart = vec![0, 159, 146, 150];
317 /// assert!(str::from_utf8(&sparkle_heart).is_err());
320 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
321 /// errors that can be returned.
323 /// [error]: struct.Utf8Error.html
325 /// A "stack allocated string":
330 /// // some bytes, in a stack-allocated array
331 /// let sparkle_heart = [240, 159, 146, 150];
333 /// // We know these bytes are valid, so just use `unwrap()`.
334 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
336 /// assert_eq!("💖", sparkle_heart);
338 #[stable(feature = "rust1", since = "1.0.0")]
339 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
340 run_utf8_validation(v)?;
341 // SAFETY: Just ran validation.
342 Ok(unsafe { from_utf8_unchecked(v) })
345 /// Converts a mutable slice of bytes to a mutable string slice.
354 /// // "Hello, Rust!" as a mutable vector
355 /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33];
357 /// // As we know these bytes are valid, we can use `unwrap()`
358 /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap();
360 /// assert_eq!("Hello, Rust!", outstr);
368 /// // Some invalid bytes in a mutable vector
369 /// let mut invalid = vec![128, 223];
371 /// assert!(str::from_utf8_mut(&mut invalid).is_err());
373 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
374 /// errors that can be returned.
376 /// [error]: struct.Utf8Error.html
377 #[stable(feature = "str_mut_extras", since = "1.20.0")]
378 pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
379 run_utf8_validation(v)?;
380 // SAFETY: Just ran validation.
381 Ok(unsafe { from_utf8_unchecked_mut(v) })
384 /// Converts a slice of bytes to a string slice without checking
385 /// that the string contains valid UTF-8.
387 /// See the safe version, [`from_utf8`][fromutf8], for more information.
389 /// [fromutf8]: fn.from_utf8.html
393 /// This function is unsafe because it does not check that the bytes passed to
394 /// it are valid UTF-8. If this constraint is violated, undefined behavior
395 /// results, as the rest of Rust assumes that [`&str`]s are valid UTF-8.
406 /// // some bytes, in a vector
407 /// let sparkle_heart = vec![240, 159, 146, 150];
409 /// let sparkle_heart = unsafe {
410 /// str::from_utf8_unchecked(&sparkle_heart)
413 /// assert_eq!("💖", sparkle_heart);
416 #[stable(feature = "rust1", since = "1.0.0")]
417 #[rustc_const_unstable(feature = "const_str_from_utf8_unchecked", issue = "75196")]
418 #[allow(unused_attributes)]
419 #[allow_internal_unstable(const_fn_transmute)]
420 pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
421 // SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8.
422 // Also relies on `&str` and `&[u8]` having the same layout.
423 unsafe { mem::transmute(v) }
426 /// Converts a slice of bytes to a string slice without checking
427 /// that the string contains valid UTF-8; mutable version.
429 /// See the immutable version, [`from_utf8_unchecked()`] for more information.
438 /// let mut heart = vec![240, 159, 146, 150];
439 /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) };
441 /// assert_eq!("💖", heart);
444 #[stable(feature = "str_mut_extras", since = "1.20.0")]
445 pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
446 // SAFETY: the caller must guarantee that the bytes `v`
447 // are valid UTF-8, thus the cast to `*mut str` is safe.
448 // Also, the pointer dereference is safe because that pointer
449 // comes from a reference which is guaranteed to be valid for writes.
450 unsafe { &mut *(v as *mut [u8] as *mut str) }
453 #[stable(feature = "rust1", since = "1.0.0")]
454 impl fmt::Display for Utf8Error {
455 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
456 if let Some(error_len) = self.error_len {
459 "invalid utf-8 sequence of {} bytes from index {}",
460 error_len, self.valid_up_to
463 write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
472 /// An iterator over the [`char`]s of a string slice.
475 /// This struct is created by the [`chars`] method on [`str`].
476 /// See its documentation for more.
478 /// [`chars`]: str::chars
480 #[stable(feature = "rust1", since = "1.0.0")]
481 pub struct Chars<'a> {
482 iter: slice::Iter<'a, u8>,
485 /// Returns the initial codepoint accumulator for the first byte.
486 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
487 /// for width 3, and 3 bits for width 4.
489 fn utf8_first_byte(byte: u8, width: u32) -> u32 {
490 (byte & (0x7F >> width)) as u32
493 /// Returns the value of `ch` updated with continuation byte `byte`.
495 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
496 (ch << 6) | (byte & CONT_MASK) as u32
499 /// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
502 fn utf8_is_cont_byte(byte: u8) -> bool {
503 (byte & !CONT_MASK) == TAG_CONT_U8
507 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
514 /// Reads the next code point out of a byte iterator (assuming a
515 /// UTF-8-like encoding).
516 #[unstable(feature = "str_internals", issue = "none")]
518 pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
520 let x = *bytes.next()?;
522 return Some(x as u32);
525 // Multibyte case follows
526 // Decode from a byte combination out of: [[[x y] z] w]
527 // NOTE: Performance is sensitive to the exact formulation here
528 let init = utf8_first_byte(x, 2);
529 let y = unwrap_or_0(bytes.next());
530 let mut ch = utf8_acc_cont_byte(init, y);
533 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
534 let z = unwrap_or_0(bytes.next());
535 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
536 ch = init << 12 | y_z;
539 // use only the lower 3 bits of `init`
540 let w = unwrap_or_0(bytes.next());
541 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
548 /// Reads the last code point out of a byte iterator (assuming a
549 /// UTF-8-like encoding).
551 fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
553 I: DoubleEndedIterator<Item = &'a u8>,
556 let w = match *bytes.next_back()? {
557 next_byte if next_byte < 128 => return Some(next_byte as u32),
558 back_byte => back_byte,
561 // Multibyte case follows
562 // Decode from a byte combination out of: [x [y [z w]]]
564 let z = unwrap_or_0(bytes.next_back());
565 ch = utf8_first_byte(z, 2);
566 if utf8_is_cont_byte(z) {
567 let y = unwrap_or_0(bytes.next_back());
568 ch = utf8_first_byte(y, 3);
569 if utf8_is_cont_byte(y) {
570 let x = unwrap_or_0(bytes.next_back());
571 ch = utf8_first_byte(x, 4);
572 ch = utf8_acc_cont_byte(ch, y);
574 ch = utf8_acc_cont_byte(ch, z);
576 ch = utf8_acc_cont_byte(ch, w);
581 #[stable(feature = "rust1", since = "1.0.0")]
582 impl<'a> Iterator for Chars<'a> {
586 fn next(&mut self) -> Option<char> {
587 next_code_point(&mut self.iter).map(|ch| {
588 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
589 unsafe { char::from_u32_unchecked(ch) }
594 fn count(self) -> usize {
595 // length in `char` is equal to the number of non-continuation bytes
596 let bytes_len = self.iter.len();
597 let mut cont_bytes = 0;
598 for &byte in self.iter {
599 cont_bytes += utf8_is_cont_byte(byte) as usize;
601 bytes_len - cont_bytes
605 fn size_hint(&self) -> (usize, Option<usize>) {
606 let len = self.iter.len();
607 // `(len + 3)` can't overflow, because we know that the `slice::Iter`
608 // belongs to a slice in memory which has a maximum length of
609 // `isize::MAX` (that's well below `usize::MAX`).
610 ((len + 3) / 4, Some(len))
614 fn last(mut self) -> Option<char> {
615 // No need to go through the entire string.
620 #[stable(feature = "chars_debug_impl", since = "1.38.0")]
621 impl fmt::Debug for Chars<'_> {
622 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
623 write!(f, "Chars(")?;
624 f.debug_list().entries(self.clone()).finish()?;
630 #[stable(feature = "rust1", since = "1.0.0")]
631 impl<'a> DoubleEndedIterator for Chars<'a> {
633 fn next_back(&mut self) -> Option<char> {
634 next_code_point_reverse(&mut self.iter).map(|ch| {
635 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
636 unsafe { char::from_u32_unchecked(ch) }
641 #[stable(feature = "fused", since = "1.26.0")]
642 impl FusedIterator for Chars<'_> {}
645 /// Views the underlying data as a subslice of the original data.
647 /// This has the same lifetime as the original slice, and so the
648 /// iterator can continue to be used while this exists.
653 /// let mut chars = "abc".chars();
655 /// assert_eq!(chars.as_str(), "abc");
657 /// assert_eq!(chars.as_str(), "bc");
660 /// assert_eq!(chars.as_str(), "");
662 #[stable(feature = "iter_to_slice", since = "1.4.0")]
664 pub fn as_str(&self) -> &'a str {
665 // SAFETY: `Chars` is only made from a str, which guarantees the iter is valid UTF-8.
666 unsafe { from_utf8_unchecked(self.iter.as_slice()) }
670 /// An iterator over the [`char`]s of a string slice, and their positions.
672 /// This struct is created by the [`char_indices`] method on [`str`].
673 /// See its documentation for more.
675 /// [`char_indices`]: str::char_indices
676 #[derive(Clone, Debug)]
677 #[stable(feature = "rust1", since = "1.0.0")]
678 pub struct CharIndices<'a> {
683 #[stable(feature = "rust1", since = "1.0.0")]
684 impl<'a> Iterator for CharIndices<'a> {
685 type Item = (usize, char);
688 fn next(&mut self) -> Option<(usize, char)> {
689 let pre_len = self.iter.iter.len();
690 match self.iter.next() {
693 let index = self.front_offset;
694 let len = self.iter.iter.len();
695 self.front_offset += pre_len - len;
702 fn count(self) -> usize {
707 fn size_hint(&self) -> (usize, Option<usize>) {
708 self.iter.size_hint()
712 fn last(mut self) -> Option<(usize, char)> {
713 // No need to go through the entire string.
718 #[stable(feature = "rust1", since = "1.0.0")]
719 impl<'a> DoubleEndedIterator for CharIndices<'a> {
721 fn next_back(&mut self) -> Option<(usize, char)> {
722 self.iter.next_back().map(|ch| {
723 let index = self.front_offset + self.iter.iter.len();
729 #[stable(feature = "fused", since = "1.26.0")]
730 impl FusedIterator for CharIndices<'_> {}
732 impl<'a> CharIndices<'a> {
733 /// Views the underlying data as a subslice of the original data.
735 /// This has the same lifetime as the original slice, and so the
736 /// iterator can continue to be used while this exists.
737 #[stable(feature = "iter_to_slice", since = "1.4.0")]
739 pub fn as_str(&self) -> &'a str {
744 /// An iterator over the bytes of a string slice.
746 /// This struct is created by the [`bytes`] method on [`str`].
747 /// See its documentation for more.
749 /// [`bytes`]: str::bytes
750 #[stable(feature = "rust1", since = "1.0.0")]
751 #[derive(Clone, Debug)]
752 pub struct Bytes<'a>(Copied<slice::Iter<'a, u8>>);
754 #[stable(feature = "rust1", since = "1.0.0")]
755 impl Iterator for Bytes<'_> {
759 fn next(&mut self) -> Option<u8> {
764 fn size_hint(&self) -> (usize, Option<usize>) {
769 fn count(self) -> usize {
774 fn last(self) -> Option<Self::Item> {
779 fn nth(&mut self, n: usize) -> Option<Self::Item> {
784 fn all<F>(&mut self, f: F) -> bool
786 F: FnMut(Self::Item) -> bool,
792 fn any<F>(&mut self, f: F) -> bool
794 F: FnMut(Self::Item) -> bool,
800 fn find<P>(&mut self, predicate: P) -> Option<Self::Item>
802 P: FnMut(&Self::Item) -> bool,
804 self.0.find(predicate)
808 fn position<P>(&mut self, predicate: P) -> Option<usize>
810 P: FnMut(Self::Item) -> bool,
812 self.0.position(predicate)
816 fn rposition<P>(&mut self, predicate: P) -> Option<usize>
818 P: FnMut(Self::Item) -> bool,
820 self.0.rposition(predicate)
824 #[stable(feature = "rust1", since = "1.0.0")]
825 impl DoubleEndedIterator for Bytes<'_> {
827 fn next_back(&mut self) -> Option<u8> {
832 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
837 fn rfind<P>(&mut self, predicate: P) -> Option<Self::Item>
839 P: FnMut(&Self::Item) -> bool,
841 self.0.rfind(predicate)
845 #[stable(feature = "rust1", since = "1.0.0")]
846 impl ExactSizeIterator for Bytes<'_> {
848 fn len(&self) -> usize {
853 fn is_empty(&self) -> bool {
858 #[stable(feature = "fused", since = "1.26.0")]
859 impl FusedIterator for Bytes<'_> {}
861 #[unstable(feature = "trusted_len", issue = "37572")]
862 unsafe impl TrustedLen for Bytes<'_> {}
865 unsafe impl TrustedRandomAccess for Bytes<'_> {
866 unsafe fn get_unchecked(&mut self, i: usize) -> u8 {
867 // SAFETY: the caller must uphold the safety contract
868 // for `TrustedRandomAccess::get_unchecked`.
869 unsafe { self.0.get_unchecked(i) }
871 fn may_have_side_effect() -> bool {
876 /// This macro generates a Clone impl for string pattern API
877 /// wrapper types of the form X<'a, P>
878 macro_rules! derive_pattern_clone {
879 (clone $t:ident with |$s:ident| $e:expr) => {
880 impl<'a, P> Clone for $t<'a, P>
882 P: Pattern<'a, Searcher: Clone>,
884 fn clone(&self) -> Self {
892 /// This macro generates two public iterator structs
893 /// wrapping a private internal one that makes use of the `Pattern` API.
895 /// For all patterns `P: Pattern<'a>` the following items will be
896 /// generated (generics omitted):
898 /// struct $forward_iterator($internal_iterator);
899 /// struct $reverse_iterator($internal_iterator);
901 /// impl Iterator for $forward_iterator
902 /// { /* internal ends up calling Searcher::next_match() */ }
904 /// impl DoubleEndedIterator for $forward_iterator
905 /// where P::Searcher: DoubleEndedSearcher
906 /// { /* internal ends up calling Searcher::next_match_back() */ }
908 /// impl Iterator for $reverse_iterator
909 /// where P::Searcher: ReverseSearcher
910 /// { /* internal ends up calling Searcher::next_match_back() */ }
912 /// impl DoubleEndedIterator for $reverse_iterator
913 /// where P::Searcher: DoubleEndedSearcher
914 /// { /* internal ends up calling Searcher::next_match() */ }
916 /// The internal one is defined outside the macro, and has almost the same
917 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
918 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
920 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
921 /// `Pattern` might not return the same elements, so actually implementing
922 /// `DoubleEndedIterator` for it would be incorrect.
923 /// (See the docs in `str::pattern` for more details)
925 /// However, the internal struct still represents a single ended iterator from
926 /// either end, and depending on pattern is also a valid double ended iterator,
927 /// so the two wrapper structs implement `Iterator`
928 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
929 /// to the complex impls seen above.
930 macro_rules! generate_pattern_iterators {
934 $(#[$forward_iterator_attribute:meta])*
935 struct $forward_iterator:ident;
939 $(#[$reverse_iterator_attribute:meta])*
940 struct $reverse_iterator:ident;
942 // Stability of all generated items
944 $(#[$common_stability_attribute:meta])*
946 // Internal almost-iterator that is being delegated to
948 $internal_iterator:ident yielding ($iterty:ty);
950 // Kind of delegation - either single ended or double ended
953 $(#[$forward_iterator_attribute])*
954 $(#[$common_stability_attribute])*
955 pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
957 $(#[$common_stability_attribute])*
958 impl<'a, P> fmt::Debug for $forward_iterator<'a, P>
960 P: Pattern<'a, Searcher: fmt::Debug>,
962 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
963 f.debug_tuple(stringify!($forward_iterator))
969 $(#[$common_stability_attribute])*
970 impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
974 fn next(&mut self) -> Option<$iterty> {
979 $(#[$common_stability_attribute])*
980 impl<'a, P> Clone for $forward_iterator<'a, P>
982 P: Pattern<'a, Searcher: Clone>,
984 fn clone(&self) -> Self {
985 $forward_iterator(self.0.clone())
989 $(#[$reverse_iterator_attribute])*
990 $(#[$common_stability_attribute])*
991 pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
993 $(#[$common_stability_attribute])*
994 impl<'a, P> fmt::Debug for $reverse_iterator<'a, P>
996 P: Pattern<'a, Searcher: fmt::Debug>,
998 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
999 f.debug_tuple(stringify!($reverse_iterator))
1005 $(#[$common_stability_attribute])*
1006 impl<'a, P> Iterator for $reverse_iterator<'a, P>
1008 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1010 type Item = $iterty;
1013 fn next(&mut self) -> Option<$iterty> {
1018 $(#[$common_stability_attribute])*
1019 impl<'a, P> Clone for $reverse_iterator<'a, P>
1021 P: Pattern<'a, Searcher: Clone>,
1023 fn clone(&self) -> Self {
1024 $reverse_iterator(self.0.clone())
1028 #[stable(feature = "fused", since = "1.26.0")]
1029 impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {}
1031 #[stable(feature = "fused", since = "1.26.0")]
1032 impl<'a, P> FusedIterator for $reverse_iterator<'a, P>
1034 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1037 generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
1039 $reverse_iterator, $iterty);
1042 double ended; with $(#[$common_stability_attribute:meta])*,
1043 $forward_iterator:ident,
1044 $reverse_iterator:ident, $iterty:ty
1046 $(#[$common_stability_attribute])*
1047 impl<'a, P> DoubleEndedIterator for $forward_iterator<'a, P>
1049 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1052 fn next_back(&mut self) -> Option<$iterty> {
1057 $(#[$common_stability_attribute])*
1058 impl<'a, P> DoubleEndedIterator for $reverse_iterator<'a, P>
1060 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1063 fn next_back(&mut self) -> Option<$iterty> {
1069 single ended; with $(#[$common_stability_attribute:meta])*,
1070 $forward_iterator:ident,
1071 $reverse_iterator:ident, $iterty:ty
1075 derive_pattern_clone! {
1077 with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
1080 struct SplitInternal<'a, P: Pattern<'a>> {
1083 matcher: P::Searcher,
1084 allow_trailing_empty: bool,
1088 impl<'a, P> fmt::Debug for SplitInternal<'a, P>
1090 P: Pattern<'a, Searcher: fmt::Debug>,
1092 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1093 f.debug_struct("SplitInternal")
1094 .field("start", &self.start)
1095 .field("end", &self.end)
1096 .field("matcher", &self.matcher)
1097 .field("allow_trailing_empty", &self.allow_trailing_empty)
1098 .field("finished", &self.finished)
1103 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1105 fn get_end(&mut self) -> Option<&'a str> {
1106 if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
1107 self.finished = true;
1108 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1110 let string = self.matcher.haystack().get_unchecked(self.start..self.end);
1119 fn next(&mut self) -> Option<&'a str> {
1124 let haystack = self.matcher.haystack();
1125 match self.matcher.next_match() {
1126 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1127 Some((a, b)) => unsafe {
1128 let elt = haystack.get_unchecked(self.start..a);
1132 None => self.get_end(),
1137 fn next_inclusive(&mut self) -> Option<&'a str> {
1142 let haystack = self.matcher.haystack();
1143 match self.matcher.next_match() {
1144 // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
1145 // and self.start is either the start of the original string,
1146 // or `b` was assigned to it, so it also lies on unicode boundary.
1147 Some((_, b)) => unsafe {
1148 let elt = haystack.get_unchecked(self.start..b);
1152 None => self.get_end(),
1157 fn next_back(&mut self) -> Option<&'a str>
1159 P::Searcher: ReverseSearcher<'a>,
1165 if !self.allow_trailing_empty {
1166 self.allow_trailing_empty = true;
1167 match self.next_back() {
1168 Some(elt) if !elt.is_empty() => return Some(elt),
1177 let haystack = self.matcher.haystack();
1178 match self.matcher.next_match_back() {
1179 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1180 Some((a, b)) => unsafe {
1181 let elt = haystack.get_unchecked(b..self.end);
1185 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1187 self.finished = true;
1188 Some(haystack.get_unchecked(self.start..self.end))
1194 fn next_back_inclusive(&mut self) -> Option<&'a str>
1196 P::Searcher: ReverseSearcher<'a>,
1202 if !self.allow_trailing_empty {
1203 self.allow_trailing_empty = true;
1204 match self.next_back_inclusive() {
1205 Some(elt) if !elt.is_empty() => return Some(elt),
1214 let haystack = self.matcher.haystack();
1215 match self.matcher.next_match_back() {
1216 // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary,
1217 // and self.end is either the end of the original string,
1218 // or `b` was assigned to it, so it also lies on unicode boundary.
1219 Some((_, b)) => unsafe {
1220 let elt = haystack.get_unchecked(b..self.end);
1224 // SAFETY: self.start is either the start of the original string,
1225 // or start of a substring that represents the part of the string that hasn't
1226 // iterated yet. Either way, it is guaranteed to lie on unicode boundary.
1227 // self.end is either the end of the original string,
1228 // or `b` was assigned to it, so it also lies on unicode boundary.
1230 self.finished = true;
1231 Some(haystack.get_unchecked(self.start..self.end))
1237 generate_pattern_iterators! {
1239 /// Created with the method [`split`].
1241 /// [`split`]: str::split
1244 /// Created with the method [`rsplit`].
1246 /// [`rsplit`]: str::rsplit
1249 #[stable(feature = "rust1", since = "1.0.0")]
1251 SplitInternal yielding (&'a str);
1252 delegate double ended;
1255 generate_pattern_iterators! {
1257 /// Created with the method [`split_terminator`].
1259 /// [`split_terminator`]: str::split_terminator
1260 struct SplitTerminator;
1262 /// Created with the method [`rsplit_terminator`].
1264 /// [`rsplit_terminator`]: str::rsplit_terminator
1265 struct RSplitTerminator;
1267 #[stable(feature = "rust1", since = "1.0.0")]
1269 SplitInternal yielding (&'a str);
1270 delegate double ended;
1273 derive_pattern_clone! {
1274 clone SplitNInternal
1275 with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
1278 struct SplitNInternal<'a, P: Pattern<'a>> {
1279 iter: SplitInternal<'a, P>,
1280 /// The number of splits remaining
1284 impl<'a, P> fmt::Debug for SplitNInternal<'a, P>
1286 P: Pattern<'a, Searcher: fmt::Debug>,
1288 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1289 f.debug_struct("SplitNInternal")
1290 .field("iter", &self.iter)
1291 .field("count", &self.count)
1296 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
1298 fn next(&mut self) -> Option<&'a str> {
1313 fn next_back(&mut self) -> Option<&'a str>
1315 P::Searcher: ReverseSearcher<'a>,
1325 self.iter.next_back()
1331 generate_pattern_iterators! {
1333 /// Created with the method [`splitn`].
1335 /// [`splitn`]: str::splitn
1338 /// Created with the method [`rsplitn`].
1340 /// [`rsplitn`]: str::rsplitn
1343 #[stable(feature = "rust1", since = "1.0.0")]
1345 SplitNInternal yielding (&'a str);
1346 delegate single ended;
1349 derive_pattern_clone! {
1350 clone MatchIndicesInternal
1351 with |s| MatchIndicesInternal(s.0.clone())
1354 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
1356 impl<'a, P> fmt::Debug for MatchIndicesInternal<'a, P>
1358 P: Pattern<'a, Searcher: fmt::Debug>,
1360 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1361 f.debug_tuple("MatchIndicesInternal").field(&self.0).finish()
1365 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1367 fn next(&mut self) -> Option<(usize, &'a str)> {
1370 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1371 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1375 fn next_back(&mut self) -> Option<(usize, &'a str)>
1377 P::Searcher: ReverseSearcher<'a>,
1381 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1382 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1386 generate_pattern_iterators! {
1388 /// Created with the method [`match_indices`].
1390 /// [`match_indices`]: str::match_indices
1391 struct MatchIndices;
1393 /// Created with the method [`rmatch_indices`].
1395 /// [`rmatch_indices`]: str::rmatch_indices
1396 struct RMatchIndices;
1398 #[stable(feature = "str_match_indices", since = "1.5.0")]
1400 MatchIndicesInternal yielding ((usize, &'a str));
1401 delegate double ended;
1404 derive_pattern_clone! {
1405 clone MatchesInternal
1406 with |s| MatchesInternal(s.0.clone())
1409 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1411 impl<'a, P> fmt::Debug for MatchesInternal<'a, P>
1413 P: Pattern<'a, Searcher: fmt::Debug>,
1415 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1416 f.debug_tuple("MatchesInternal").field(&self.0).finish()
1420 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1422 fn next(&mut self) -> Option<&'a str> {
1423 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1424 self.0.next_match().map(|(a, b)| unsafe {
1425 // Indices are known to be on utf8 boundaries
1426 self.0.haystack().get_unchecked(a..b)
1431 fn next_back(&mut self) -> Option<&'a str>
1433 P::Searcher: ReverseSearcher<'a>,
1435 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1436 self.0.next_match_back().map(|(a, b)| unsafe {
1437 // Indices are known to be on utf8 boundaries
1438 self.0.haystack().get_unchecked(a..b)
1443 generate_pattern_iterators! {
1445 /// Created with the method [`matches`].
1447 /// [`matches`]: str::matches
1450 /// Created with the method [`rmatches`].
1452 /// [`rmatches`]: str::rmatches
1455 #[stable(feature = "str_matches", since = "1.2.0")]
1457 MatchesInternal yielding (&'a str);
1458 delegate double ended;
1461 /// An iterator over the lines of a string, as string slices.
1463 /// This struct is created with the [`lines`] method on [`str`].
1464 /// See its documentation for more.
1466 /// [`lines`]: str::lines
1467 #[stable(feature = "rust1", since = "1.0.0")]
1468 #[derive(Clone, Debug)]
1469 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1471 #[stable(feature = "rust1", since = "1.0.0")]
1472 impl<'a> Iterator for Lines<'a> {
1473 type Item = &'a str;
1476 fn next(&mut self) -> Option<&'a str> {
1481 fn size_hint(&self) -> (usize, Option<usize>) {
1486 fn last(mut self) -> Option<&'a str> {
1491 #[stable(feature = "rust1", since = "1.0.0")]
1492 impl<'a> DoubleEndedIterator for Lines<'a> {
1494 fn next_back(&mut self) -> Option<&'a str> {
1499 #[stable(feature = "fused", since = "1.26.0")]
1500 impl FusedIterator for Lines<'_> {}
1502 /// Created with the method [`lines_any`].
1504 /// [`lines_any`]: str::lines_any
1505 #[stable(feature = "rust1", since = "1.0.0")]
1506 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1507 #[derive(Clone, Debug)]
1508 #[allow(deprecated)]
1509 pub struct LinesAny<'a>(Lines<'a>);
1512 /// A nameable, cloneable fn type
1514 struct LinesAnyMap impl<'a> Fn = |line: &'a str| -> &'a str {
1516 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1521 #[stable(feature = "rust1", since = "1.0.0")]
1522 #[allow(deprecated)]
1523 impl<'a> Iterator for LinesAny<'a> {
1524 type Item = &'a str;
1527 fn next(&mut self) -> Option<&'a str> {
1532 fn size_hint(&self) -> (usize, Option<usize>) {
1537 #[stable(feature = "rust1", since = "1.0.0")]
1538 #[allow(deprecated)]
1539 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1541 fn next_back(&mut self) -> Option<&'a str> {
1546 #[stable(feature = "fused", since = "1.26.0")]
1547 #[allow(deprecated)]
1548 impl FusedIterator for LinesAny<'_> {}
1551 Section: UTF-8 validation
1554 // use truncation to fit u64 into usize
1555 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1557 /// Returns `true` if any byte in the word `x` is nonascii (>= 128).
1559 fn contains_nonascii(x: usize) -> bool {
1560 (x & NONASCII_MASK) != 0
1563 /// Walks through `v` checking that it's a valid UTF-8 sequence,
1564 /// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
1566 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1570 let usize_bytes = mem::size_of::<usize>();
1571 let ascii_block_size = 2 * usize_bytes;
1572 let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1573 let align = v.as_ptr().align_offset(usize_bytes);
1576 let old_offset = index;
1578 ($error_len: expr) => {
1579 return Err(Utf8Error { valid_up_to: old_offset, error_len: $error_len });
1586 // we needed data, but there was none: error!
1594 let first = v[index];
1596 let w = UTF8_CHAR_WIDTH[first as usize];
1597 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
1598 // first C2 80 last DF BF
1599 // 3-byte encoding is for codepoints \u{0800} to \u{ffff}
1600 // first E0 A0 80 last EF BF BF
1601 // excluding surrogates codepoints \u{d800} to \u{dfff}
1602 // ED A0 80 to ED BF BF
1603 // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1604 // first F0 90 80 80 last F4 8F BF BF
1606 // Use the UTF-8 syntax from the RFC
1608 // https://tools.ietf.org/html/rfc3629
1610 // UTF8-2 = %xC2-DF UTF8-tail
1611 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1612 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1613 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1614 // %xF4 %x80-8F 2( UTF8-tail )
1617 if next!() & !CONT_MASK != TAG_CONT_U8 {
1622 match (first, next!()) {
1624 | (0xE1..=0xEC, 0x80..=0xBF)
1625 | (0xED, 0x80..=0x9F)
1626 | (0xEE..=0xEF, 0x80..=0xBF) => {}
1629 if next!() & !CONT_MASK != TAG_CONT_U8 {
1634 match (first, next!()) {
1635 (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
1638 if next!() & !CONT_MASK != TAG_CONT_U8 {
1641 if next!() & !CONT_MASK != TAG_CONT_U8 {
1649 // Ascii case, try to skip forward quickly.
1650 // When the pointer is aligned, read 2 words of data per iteration
1651 // until we find a word containing a non-ascii byte.
1652 if align != usize::MAX && align.wrapping_sub(index) % usize_bytes == 0 {
1653 let ptr = v.as_ptr();
1654 while index < blocks_end {
1655 // SAFETY: since `align - index` and `ascii_block_size` are
1656 // multiples of `usize_bytes`, `block = ptr.add(index)` is
1657 // always aligned with a `usize` so it's safe to dereference
1658 // both `block` and `block.offset(1)`.
1660 let block = ptr.add(index) as *const usize;
1661 // break if there is a nonascii byte
1662 let zu = contains_nonascii(*block);
1663 let zv = contains_nonascii(*block.offset(1));
1668 index += ascii_block_size;
1670 // step from the point where the wordwise loop stopped
1671 while index < len && v[index] < 128 {
1683 // https://tools.ietf.org/html/rfc3629
1684 static UTF8_CHAR_WIDTH: [u8; 256] = [
1685 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1687 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1689 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1691 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1697 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1699 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
1700 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
1703 /// Given a first byte, determines how many bytes are in this UTF-8 character.
1704 #[unstable(feature = "str_internals", issue = "none")]
1706 pub fn utf8_char_width(b: u8) -> usize {
1707 UTF8_CHAR_WIDTH[b as usize] as usize
1710 /// Mask of the value bits of a continuation byte.
1711 const CONT_MASK: u8 = 0b0011_1111;
1712 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
1713 const TAG_CONT_U8: u8 = 0b1000_0000;
1716 Section: Trait implementations
1720 use crate::cmp::Ordering;
1723 use crate::slice::SliceIndex;
1725 /// Implements ordering of strings.
1727 /// Strings are ordered lexicographically by their byte values. This orders Unicode code
1728 /// points based on their positions in the code charts. This is not necessarily the same as
1729 /// "alphabetical" order, which varies by language and locale. Sorting strings according to
1730 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1732 #[stable(feature = "rust1", since = "1.0.0")]
1735 fn cmp(&self, other: &str) -> Ordering {
1736 self.as_bytes().cmp(other.as_bytes())
1740 #[stable(feature = "rust1", since = "1.0.0")]
1741 impl PartialEq for str {
1743 fn eq(&self, other: &str) -> bool {
1744 self.as_bytes() == other.as_bytes()
1747 fn ne(&self, other: &str) -> bool {
1752 #[stable(feature = "rust1", since = "1.0.0")]
1755 /// Implements comparison operations on strings.
1757 /// Strings are compared lexicographically by their byte values. This compares Unicode code
1758 /// points based on their positions in the code charts. This is not necessarily the same as
1759 /// "alphabetical" order, which varies by language and locale. Comparing strings according to
1760 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1762 #[stable(feature = "rust1", since = "1.0.0")]
1763 impl PartialOrd for str {
1765 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1766 Some(self.cmp(other))
1770 #[stable(feature = "rust1", since = "1.0.0")]
1771 impl<I> ops::Index<I> for str
1775 type Output = I::Output;
1778 fn index(&self, index: I) -> &I::Output {
1783 #[stable(feature = "rust1", since = "1.0.0")]
1784 impl<I> ops::IndexMut<I> for str
1789 fn index_mut(&mut self, index: I) -> &mut I::Output {
1790 index.index_mut(self)
1797 fn str_index_overflow_fail() -> ! {
1798 panic!("attempted to index str up to maximum usize");
1801 /// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`.
1803 /// Returns a slice of the whole string, i.e., returns `&self` or `&mut
1804 /// self`. Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`. Unlike
1805 /// other indexing operations, this can never panic.
1807 /// This operation is `O(1)`.
1809 /// Prior to 1.20.0, these indexing operations were still supported by
1810 /// direct implementation of `Index` and `IndexMut`.
1812 /// Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`.
1813 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1814 unsafe impl SliceIndex<str> for ops::RangeFull {
1817 fn get(self, slice: &str) -> Option<&Self::Output> {
1821 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1825 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
1829 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
1833 fn index(self, slice: &str) -> &Self::Output {
1837 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1842 /// Implements substring slicing with syntax `&self[begin .. end]` or `&mut
1843 /// self[begin .. end]`.
1845 /// Returns a slice of the given string from the byte range
1846 /// [`begin`, `end`).
1848 /// This operation is `O(1)`.
1850 /// Prior to 1.20.0, these indexing operations were still supported by
1851 /// direct implementation of `Index` and `IndexMut`.
1855 /// Panics if `begin` or `end` does not point to the starting byte offset of
1856 /// a character (as defined by `is_char_boundary`), if `begin > end`, or if
1862 /// let s = "Löwe 老虎 Léopard";
1863 /// assert_eq!(&s[0 .. 1], "L");
1865 /// assert_eq!(&s[1 .. 9], "öwe 老");
1867 /// // these will panic:
1868 /// // byte 2 lies within `ö`:
1871 /// // byte 8 lies within `老`
1874 /// // byte 100 is outside the string
1875 /// // &s[3 .. 100];
1877 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1878 unsafe impl SliceIndex<str> for ops::Range<usize> {
1881 fn get(self, slice: &str) -> Option<&Self::Output> {
1882 if self.start <= self.end
1883 && slice.is_char_boundary(self.start)
1884 && slice.is_char_boundary(self.end)
1886 // SAFETY: just checked that `start` and `end` are on a char boundary,
1887 // and we are passing in a safe reference, so the return value will also be one.
1888 // We also checked char boundaries, so this is valid UTF-8.
1889 Some(unsafe { &*self.get_unchecked(slice) })
1895 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1896 if self.start <= self.end
1897 && slice.is_char_boundary(self.start)
1898 && slice.is_char_boundary(self.end)
1900 // SAFETY: just checked that `start` and `end` are on a char boundary.
1901 // We know the pointer is unique because we got it from `slice`.
1902 Some(unsafe { &mut *self.get_unchecked_mut(slice) })
1908 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
1909 let slice = slice as *const [u8];
1910 // SAFETY: the caller guarantees that `self` is in bounds of `slice`
1911 // which satisfies all the conditions for `add`.
1912 let ptr = unsafe { slice.as_ptr().add(self.start) };
1913 let len = self.end - self.start;
1914 ptr::slice_from_raw_parts(ptr, len) as *const str
1917 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
1918 let slice = slice as *mut [u8];
1919 // SAFETY: see comments for `get_unchecked`.
1920 let ptr = unsafe { slice.as_mut_ptr().add(self.start) };
1921 let len = self.end - self.start;
1922 ptr::slice_from_raw_parts_mut(ptr, len) as *mut str
1925 fn index(self, slice: &str) -> &Self::Output {
1926 let (start, end) = (self.start, self.end);
1927 match self.get(slice) {
1929 None => super::slice_error_fail(slice, start, end),
1933 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1934 // is_char_boundary checks that the index is in [0, .len()]
1935 // cannot reuse `get` as above, because of NLL trouble
1936 if self.start <= self.end
1937 && slice.is_char_boundary(self.start)
1938 && slice.is_char_boundary(self.end)
1940 // SAFETY: just checked that `start` and `end` are on a char boundary,
1941 // and we are passing in a safe reference, so the return value will also be one.
1942 unsafe { &mut *self.get_unchecked_mut(slice) }
1944 super::slice_error_fail(slice, self.start, self.end)
1949 /// Implements substring slicing with syntax `&self[.. end]` or `&mut
1952 /// Returns a slice of the given string from the byte range [`0`, `end`).
1953 /// Equivalent to `&self[0 .. end]` or `&mut self[0 .. end]`.
1955 /// This operation is `O(1)`.
1957 /// Prior to 1.20.0, these indexing operations were still supported by
1958 /// direct implementation of `Index` and `IndexMut`.
1962 /// Panics if `end` does not point to the starting byte offset of a
1963 /// character (as defined by `is_char_boundary`), or if `end > len`.
1964 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1965 unsafe impl SliceIndex<str> for ops::RangeTo<usize> {
1968 fn get(self, slice: &str) -> Option<&Self::Output> {
1969 if slice.is_char_boundary(self.end) {
1970 // SAFETY: just checked that `end` is on a char boundary,
1971 // and we are passing in a safe reference, so the return value will also be one.
1972 Some(unsafe { &*self.get_unchecked(slice) })
1978 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1979 if slice.is_char_boundary(self.end) {
1980 // SAFETY: just checked that `end` is on a char boundary,
1981 // and we are passing in a safe reference, so the return value will also be one.
1982 Some(unsafe { &mut *self.get_unchecked_mut(slice) })
1988 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
1989 let slice = slice as *const [u8];
1990 let ptr = slice.as_ptr();
1991 ptr::slice_from_raw_parts(ptr, self.end) as *const str
1994 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
1995 let slice = slice as *mut [u8];
1996 let ptr = slice.as_mut_ptr();
1997 ptr::slice_from_raw_parts_mut(ptr, self.end) as *mut str
2000 fn index(self, slice: &str) -> &Self::Output {
2002 match self.get(slice) {
2004 None => super::slice_error_fail(slice, 0, end),
2008 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2009 if slice.is_char_boundary(self.end) {
2010 // SAFETY: just checked that `end` is on a char boundary,
2011 // and we are passing in a safe reference, so the return value will also be one.
2012 unsafe { &mut *self.get_unchecked_mut(slice) }
2014 super::slice_error_fail(slice, 0, self.end)
2019 /// Implements substring slicing with syntax `&self[begin ..]` or `&mut
2020 /// self[begin ..]`.
2022 /// Returns a slice of the given string from the byte range [`begin`,
2023 /// `len`). Equivalent to `&self[begin .. len]` or `&mut self[begin ..
2026 /// This operation is `O(1)`.
2028 /// Prior to 1.20.0, these indexing operations were still supported by
2029 /// direct implementation of `Index` and `IndexMut`.
2033 /// Panics if `begin` does not point to the starting byte offset of
2034 /// a character (as defined by `is_char_boundary`), or if `begin > len`.
2035 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2036 unsafe impl SliceIndex<str> for ops::RangeFrom<usize> {
2039 fn get(self, slice: &str) -> Option<&Self::Output> {
2040 if slice.is_char_boundary(self.start) {
2041 // SAFETY: just checked that `start` is on a char boundary,
2042 // and we are passing in a safe reference, so the return value will also be one.
2043 Some(unsafe { &*self.get_unchecked(slice) })
2049 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2050 if slice.is_char_boundary(self.start) {
2051 // SAFETY: just checked that `start` is on a char boundary,
2052 // and we are passing in a safe reference, so the return value will also be one.
2053 Some(unsafe { &mut *self.get_unchecked_mut(slice) })
2059 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
2060 let slice = slice as *const [u8];
2061 // SAFETY: the caller guarantees that `self` is in bounds of `slice`
2062 // which satisfies all the conditions for `add`.
2063 let ptr = unsafe { slice.as_ptr().add(self.start) };
2064 let len = slice.len() - self.start;
2065 ptr::slice_from_raw_parts(ptr, len) as *const str
2068 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
2069 let slice = slice as *mut [u8];
2070 // SAFETY: identical to `get_unchecked`.
2071 let ptr = unsafe { slice.as_mut_ptr().add(self.start) };
2072 let len = slice.len() - self.start;
2073 ptr::slice_from_raw_parts_mut(ptr, len) as *mut str
2076 fn index(self, slice: &str) -> &Self::Output {
2077 let (start, end) = (self.start, slice.len());
2078 match self.get(slice) {
2080 None => super::slice_error_fail(slice, start, end),
2084 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2085 if slice.is_char_boundary(self.start) {
2086 // SAFETY: just checked that `start` is on a char boundary,
2087 // and we are passing in a safe reference, so the return value will also be one.
2088 unsafe { &mut *self.get_unchecked_mut(slice) }
2090 super::slice_error_fail(slice, self.start, slice.len())
2095 /// Implements substring slicing with syntax `&self[begin ..= end]` or `&mut
2096 /// self[begin ..= end]`.
2098 /// Returns a slice of the given string from the byte range
2099 /// [`begin`, `end`]. Equivalent to `&self [begin .. end + 1]` or `&mut
2100 /// self[begin .. end + 1]`, except if `end` has the maximum value for
2103 /// This operation is `O(1)`.
2107 /// Panics if `begin` does not point to the starting byte offset of
2108 /// a character (as defined by `is_char_boundary`), if `end` does not point
2109 /// to the ending byte offset of a character (`end + 1` is either a starting
2110 /// byte offset or equal to `len`), if `begin > end`, or if `end >= len`.
2111 #[stable(feature = "inclusive_range", since = "1.26.0")]
2112 unsafe impl SliceIndex<str> for ops::RangeInclusive<usize> {
2115 fn get(self, slice: &str) -> Option<&Self::Output> {
2116 if *self.end() == usize::MAX {
2119 (*self.start()..self.end() + 1).get(slice)
2123 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2124 if *self.end() == usize::MAX {
2127 (*self.start()..self.end() + 1).get_mut(slice)
2131 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
2132 // SAFETY: the caller must uphold the safety contract for `get_unchecked`.
2133 unsafe { (*self.start()..self.end() + 1).get_unchecked(slice) }
2136 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
2137 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
2138 unsafe { (*self.start()..self.end() + 1).get_unchecked_mut(slice) }
2141 fn index(self, slice: &str) -> &Self::Output {
2142 if *self.end() == usize::MAX {
2143 str_index_overflow_fail();
2145 (*self.start()..self.end() + 1).index(slice)
2148 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2149 if *self.end() == usize::MAX {
2150 str_index_overflow_fail();
2152 (*self.start()..self.end() + 1).index_mut(slice)
2156 /// Implements substring slicing with syntax `&self[..= end]` or `&mut
2159 /// Returns a slice of the given string from the byte range [0, `end`].
2160 /// Equivalent to `&self [0 .. end + 1]`, except if `end` has the maximum
2161 /// value for `usize`.
2163 /// This operation is `O(1)`.
2167 /// Panics if `end` does not point to the ending byte offset of a character
2168 /// (`end + 1` is either a starting byte offset as defined by
2169 /// `is_char_boundary`, or equal to `len`), or if `end >= len`.
2170 #[stable(feature = "inclusive_range", since = "1.26.0")]
2171 unsafe impl SliceIndex<str> for ops::RangeToInclusive<usize> {
2174 fn get(self, slice: &str) -> Option<&Self::Output> {
2175 if self.end == usize::MAX { None } else { (..self.end + 1).get(slice) }
2178 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2179 if self.end == usize::MAX { None } else { (..self.end + 1).get_mut(slice) }
2182 unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
2183 // SAFETY: the caller must uphold the safety contract for `get_unchecked`.
2184 unsafe { (..self.end + 1).get_unchecked(slice) }
2187 unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
2188 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
2189 unsafe { (..self.end + 1).get_unchecked_mut(slice) }
2192 fn index(self, slice: &str) -> &Self::Output {
2193 if self.end == usize::MAX {
2194 str_index_overflow_fail();
2196 (..self.end + 1).index(slice)
2199 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2200 if self.end == usize::MAX {
2201 str_index_overflow_fail();
2203 (..self.end + 1).index_mut(slice)
2208 // truncate `&str` to length at most equal to `max`
2209 // return `true` if it were truncated, and the new str.
2210 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
2214 while !s.is_char_boundary(max) {
2224 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
2225 const MAX_DISPLAY_LENGTH: usize = 256;
2226 let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
2227 let ellipsis = if truncated { "[...]" } else { "" };
2230 if begin > s.len() || end > s.len() {
2231 let oob_index = if begin > s.len() { begin } else { end };
2232 panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
2238 "begin <= end ({} <= {}) when slicing `{}`{}",
2245 // 3. character boundary
2246 let index = if !s.is_char_boundary(begin) { begin } else { end };
2247 // find the character
2248 let mut char_start = index;
2249 while !s.is_char_boundary(char_start) {
2252 // `char_start` must be less than len and a char boundary
2253 let ch = s[char_start..].chars().next().unwrap();
2254 let char_range = char_start..char_start + ch.len_utf8();
2256 "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
2257 index, ch, char_range, s_trunc, ellipsis
2264 /// Returns the length of `self`.
2266 /// This length is in bytes, not [`char`]s or graphemes. In other words,
2267 /// it may not be what a human considers the length of the string.
2274 /// let len = "foo".len();
2275 /// assert_eq!(3, len);
2277 /// assert_eq!("ƒoo".len(), 4); // fancy f!
2278 /// assert_eq!("ƒoo".chars().count(), 3);
2280 #[stable(feature = "rust1", since = "1.0.0")]
2281 #[rustc_const_stable(feature = "const_str_len", since = "1.32.0")]
2283 pub const fn len(&self) -> usize {
2284 self.as_bytes().len()
2287 /// Returns `true` if `self` has a length of zero bytes.
2295 /// assert!(s.is_empty());
2297 /// let s = "not empty";
2298 /// assert!(!s.is_empty());
2301 #[stable(feature = "rust1", since = "1.0.0")]
2302 #[rustc_const_stable(feature = "const_str_is_empty", since = "1.32.0")]
2303 pub const fn is_empty(&self) -> bool {
2307 /// Checks that `index`-th byte is the first byte in a UTF-8 code point
2308 /// sequence or the end of the string.
2310 /// The start and end of the string (when `index == self.len()`) are
2311 /// considered to be boundaries.
2313 /// Returns `false` if `index` is greater than `self.len()`.
2318 /// let s = "Löwe 老虎 Léopard";
2319 /// assert!(s.is_char_boundary(0));
2321 /// assert!(s.is_char_boundary(6));
2322 /// assert!(s.is_char_boundary(s.len()));
2324 /// // second byte of `ö`
2325 /// assert!(!s.is_char_boundary(2));
2327 /// // third byte of `老`
2328 /// assert!(!s.is_char_boundary(8));
2330 #[stable(feature = "is_char_boundary", since = "1.9.0")]
2332 pub fn is_char_boundary(&self, index: usize) -> bool {
2333 // 0 and len are always ok.
2334 // Test for 0 explicitly so that it can optimize out the check
2335 // easily and skip reading string data for that case.
2336 if index == 0 || index == self.len() {
2339 match self.as_bytes().get(index) {
2341 // This is bit magic equivalent to: b < 128 || b >= 192
2342 Some(&b) => (b as i8) >= -0x40,
2346 /// Converts a string slice to a byte slice. To convert the byte slice back
2347 /// into a string slice, use the [`from_utf8`] function.
2354 /// let bytes = "bors".as_bytes();
2355 /// assert_eq!(b"bors", bytes);
2357 #[stable(feature = "rust1", since = "1.0.0")]
2358 #[rustc_const_stable(feature = "str_as_bytes", since = "1.32.0")]
2360 #[allow(unused_attributes)]
2361 #[allow_internal_unstable(const_fn_transmute)]
2362 pub const fn as_bytes(&self) -> &[u8] {
2363 // SAFETY: const sound because we transmute two types with the same layout
2364 unsafe { mem::transmute(self) }
2367 /// Converts a mutable string slice to a mutable byte slice.
2371 /// The caller must ensure that the content of the slice is valid UTF-8
2372 /// before the borrow ends and the underlying `str` is used.
2374 /// Use of a `str` whose contents are not valid UTF-8 is undefined behavior.
2381 /// let mut s = String::from("Hello");
2382 /// let bytes = unsafe { s.as_bytes_mut() };
2384 /// assert_eq!(b"Hello", bytes);
2390 /// let mut s = String::from("🗻∈🌏");
2393 /// let bytes = s.as_bytes_mut();
2395 /// bytes[0] = 0xF0;
2396 /// bytes[1] = 0x9F;
2397 /// bytes[2] = 0x8D;
2398 /// bytes[3] = 0x94;
2401 /// assert_eq!("🍔∈🌏", s);
2403 #[stable(feature = "str_mut_extras", since = "1.20.0")]
2405 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
2406 // SAFETY: the cast from `&str` to `&[u8]` is safe since `str`
2407 // has the same layout as `&[u8]` (only libstd can make this guarantee).
2408 // The pointer dereference is safe since it comes from a mutable reference which
2409 // is guaranteed to be valid for writes.
2410 unsafe { &mut *(self as *mut str as *mut [u8]) }
2413 /// Converts a string slice to a raw pointer.
2415 /// As string slices are a slice of bytes, the raw pointer points to a
2416 /// [`u8`]. This pointer will be pointing to the first byte of the string
2419 /// The caller must ensure that the returned pointer is never written to.
2420 /// If you need to mutate the contents of the string slice, use [`as_mut_ptr`].
2422 /// [`as_mut_ptr`]: str::as_mut_ptr
2429 /// let s = "Hello";
2430 /// let ptr = s.as_ptr();
2432 #[stable(feature = "rust1", since = "1.0.0")]
2433 #[rustc_const_stable(feature = "rustc_str_as_ptr", since = "1.32.0")]
2435 pub const fn as_ptr(&self) -> *const u8 {
2436 self as *const str as *const u8
2439 /// Converts a mutable string slice to a raw pointer.
2441 /// As string slices are a slice of bytes, the raw pointer points to a
2442 /// [`u8`]. This pointer will be pointing to the first byte of the string
2445 /// It is your responsibility to make sure that the string slice only gets
2446 /// modified in a way that it remains valid UTF-8.
2447 #[stable(feature = "str_as_mut_ptr", since = "1.36.0")]
2449 pub fn as_mut_ptr(&mut self) -> *mut u8 {
2450 self as *mut str as *mut u8
2453 /// Returns a subslice of `str`.
2455 /// This is the non-panicking alternative to indexing the `str`. Returns
2456 /// [`None`] whenever equivalent indexing operation would panic.
2461 /// let v = String::from("🗻∈🌏");
2463 /// assert_eq!(Some("🗻"), v.get(0..4));
2465 /// // indices not on UTF-8 sequence boundaries
2466 /// assert!(v.get(1..).is_none());
2467 /// assert!(v.get(..8).is_none());
2469 /// // out of bounds
2470 /// assert!(v.get(..42).is_none());
2472 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2474 pub fn get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output> {
2478 /// Returns a mutable subslice of `str`.
2480 /// This is the non-panicking alternative to indexing the `str`. Returns
2481 /// [`None`] whenever equivalent indexing operation would panic.
2486 /// let mut v = String::from("hello");
2487 /// // correct length
2488 /// assert!(v.get_mut(0..5).is_some());
2489 /// // out of bounds
2490 /// assert!(v.get_mut(..42).is_none());
2491 /// assert_eq!(Some("he"), v.get_mut(0..2).map(|v| &*v));
2493 /// assert_eq!("hello", v);
2495 /// let s = v.get_mut(0..2);
2496 /// let s = s.map(|s| {
2497 /// s.make_ascii_uppercase();
2500 /// assert_eq!(Some("HE"), s);
2502 /// assert_eq!("HEllo", v);
2504 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2506 pub fn get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output> {
2510 /// Returns an unchecked subslice of `str`.
2512 /// This is the unchecked alternative to indexing the `str`.
2516 /// Callers of this function are responsible that these preconditions are
2519 /// * The starting index must not exceed the ending index;
2520 /// * Indexes must be within bounds of the original slice;
2521 /// * Indexes must lie on UTF-8 sequence boundaries.
2523 /// Failing that, the returned string slice may reference invalid memory or
2524 /// violate the invariants communicated by the `str` type.
2531 /// assert_eq!("🗻", v.get_unchecked(0..4));
2532 /// assert_eq!("∈", v.get_unchecked(4..7));
2533 /// assert_eq!("🌏", v.get_unchecked(7..11));
2536 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2538 pub unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output {
2539 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
2540 // the slice is dereferencable because `self` is a safe reference.
2541 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2542 unsafe { &*i.get_unchecked(self) }
2545 /// Returns a mutable, unchecked subslice of `str`.
2547 /// This is the unchecked alternative to indexing the `str`.
2551 /// Callers of this function are responsible that these preconditions are
2554 /// * The starting index must not exceed the ending index;
2555 /// * Indexes must be within bounds of the original slice;
2556 /// * Indexes must lie on UTF-8 sequence boundaries.
2558 /// Failing that, the returned string slice may reference invalid memory or
2559 /// violate the invariants communicated by the `str` type.
2564 /// let mut v = String::from("🗻∈🌏");
2566 /// assert_eq!("🗻", v.get_unchecked_mut(0..4));
2567 /// assert_eq!("∈", v.get_unchecked_mut(4..7));
2568 /// assert_eq!("🌏", v.get_unchecked_mut(7..11));
2571 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2573 pub unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output {
2574 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
2575 // the slice is dereferencable because `self` is a safe reference.
2576 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2577 unsafe { &mut *i.get_unchecked_mut(self) }
2580 /// Creates a string slice from another string slice, bypassing safety
2583 /// This is generally not recommended, use with caution! For a safe
2584 /// alternative see [`str`] and [`Index`].
2586 /// [`Index`]: crate::ops::Index
2588 /// This new slice goes from `begin` to `end`, including `begin` but
2589 /// excluding `end`.
2591 /// To get a mutable string slice instead, see the
2592 /// [`slice_mut_unchecked`] method.
2594 /// [`slice_mut_unchecked`]: str::slice_mut_unchecked
2598 /// Callers of this function are responsible that three preconditions are
2601 /// * `begin` must not exceed `end`.
2602 /// * `begin` and `end` must be byte positions within the string slice.
2603 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2610 /// let s = "Löwe 老虎 Léopard";
2613 /// assert_eq!("Löwe 老虎 Léopard", s.slice_unchecked(0, 21));
2616 /// let s = "Hello, world!";
2619 /// assert_eq!("world", s.slice_unchecked(7, 12));
2622 #[stable(feature = "rust1", since = "1.0.0")]
2623 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked(begin..end)` instead")]
2625 pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
2626 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
2627 // the slice is dereferencable because `self` is a safe reference.
2628 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2629 unsafe { &*(begin..end).get_unchecked(self) }
2632 /// Creates a string slice from another string slice, bypassing safety
2634 /// This is generally not recommended, use with caution! For a safe
2635 /// alternative see [`str`] and [`IndexMut`].
2637 /// [`IndexMut`]: crate::ops::IndexMut
2639 /// This new slice goes from `begin` to `end`, including `begin` but
2640 /// excluding `end`.
2642 /// To get an immutable string slice instead, see the
2643 /// [`slice_unchecked`] method.
2645 /// [`slice_unchecked`]: str::slice_unchecked
2649 /// Callers of this function are responsible that three preconditions are
2652 /// * `begin` must not exceed `end`.
2653 /// * `begin` and `end` must be byte positions within the string slice.
2654 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2655 #[stable(feature = "str_slice_mut", since = "1.5.0")]
2656 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked_mut(begin..end)` instead")]
2658 pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
2659 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
2660 // the slice is dereferencable because `self` is a safe reference.
2661 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
2662 unsafe { &mut *(begin..end).get_unchecked_mut(self) }
2665 /// Divide one string slice into two at an index.
2667 /// The argument, `mid`, should be a byte offset from the start of the
2668 /// string. It must also be on the boundary of a UTF-8 code point.
2670 /// The two slices returned go from the start of the string slice to `mid`,
2671 /// and from `mid` to the end of the string slice.
2673 /// To get mutable string slices instead, see the [`split_at_mut`]
2676 /// [`split_at_mut`]: str::split_at_mut
2680 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2681 /// past the end of the last code point of the string slice.
2688 /// let s = "Per Martin-Löf";
2690 /// let (first, last) = s.split_at(3);
2692 /// assert_eq!("Per", first);
2693 /// assert_eq!(" Martin-Löf", last);
2696 #[stable(feature = "str_split_at", since = "1.4.0")]
2697 pub fn split_at(&self, mid: usize) -> (&str, &str) {
2698 // is_char_boundary checks that the index is in [0, .len()]
2699 if self.is_char_boundary(mid) {
2700 // SAFETY: just checked that `mid` is on a char boundary.
2701 unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) }
2703 slice_error_fail(self, 0, mid)
2707 /// Divide one mutable string slice into two at an index.
2709 /// The argument, `mid`, should be a byte offset from the start of the
2710 /// string. It must also be on the boundary of a UTF-8 code point.
2712 /// The two slices returned go from the start of the string slice to `mid`,
2713 /// and from `mid` to the end of the string slice.
2715 /// To get immutable string slices instead, see the [`split_at`] method.
2717 /// [`split_at`]: str::split_at
2721 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2722 /// past the end of the last code point of the string slice.
2729 /// let mut s = "Per Martin-Löf".to_string();
2731 /// let (first, last) = s.split_at_mut(3);
2732 /// first.make_ascii_uppercase();
2733 /// assert_eq!("PER", first);
2734 /// assert_eq!(" Martin-Löf", last);
2736 /// assert_eq!("PER Martin-Löf", s);
2739 #[stable(feature = "str_split_at", since = "1.4.0")]
2740 pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2741 // is_char_boundary checks that the index is in [0, .len()]
2742 if self.is_char_boundary(mid) {
2743 let len = self.len();
2744 let ptr = self.as_mut_ptr();
2745 // SAFETY: just checked that `mid` is on a char boundary.
2748 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
2749 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr.add(mid), len - mid)),
2753 slice_error_fail(self, 0, mid)
2757 /// Returns an iterator over the [`char`]s of a string slice.
2759 /// As a string slice consists of valid UTF-8, we can iterate through a
2760 /// string slice by [`char`]. This method returns such an iterator.
2762 /// It's important to remember that [`char`] represents a Unicode Scalar
2763 /// Value, and may not match your idea of what a 'character' is. Iteration
2764 /// over grapheme clusters may be what you actually want. This functionality
2765 /// is not provided by Rust's standard library, check crates.io instead.
2772 /// let word = "goodbye";
2774 /// let count = word.chars().count();
2775 /// assert_eq!(7, count);
2777 /// let mut chars = word.chars();
2779 /// assert_eq!(Some('g'), chars.next());
2780 /// assert_eq!(Some('o'), chars.next());
2781 /// assert_eq!(Some('o'), chars.next());
2782 /// assert_eq!(Some('d'), chars.next());
2783 /// assert_eq!(Some('b'), chars.next());
2784 /// assert_eq!(Some('y'), chars.next());
2785 /// assert_eq!(Some('e'), chars.next());
2787 /// assert_eq!(None, chars.next());
2790 /// Remember, [`char`]s may not match your human intuition about characters:
2795 /// let mut chars = y.chars();
2797 /// assert_eq!(Some('y'), chars.next()); // not 'y̆'
2798 /// assert_eq!(Some('\u{0306}'), chars.next());
2800 /// assert_eq!(None, chars.next());
2802 #[stable(feature = "rust1", since = "1.0.0")]
2804 pub fn chars(&self) -> Chars<'_> {
2805 Chars { iter: self.as_bytes().iter() }
2808 /// Returns an iterator over the [`char`]s of a string slice, and their
2811 /// As a string slice consists of valid UTF-8, we can iterate through a
2812 /// string slice by [`char`]. This method returns an iterator of both
2813 /// these [`char`]s, as well as their byte positions.
2815 /// The iterator yields tuples. The position is first, the [`char`] is
2823 /// let word = "goodbye";
2825 /// let count = word.char_indices().count();
2826 /// assert_eq!(7, count);
2828 /// let mut char_indices = word.char_indices();
2830 /// assert_eq!(Some((0, 'g')), char_indices.next());
2831 /// assert_eq!(Some((1, 'o')), char_indices.next());
2832 /// assert_eq!(Some((2, 'o')), char_indices.next());
2833 /// assert_eq!(Some((3, 'd')), char_indices.next());
2834 /// assert_eq!(Some((4, 'b')), char_indices.next());
2835 /// assert_eq!(Some((5, 'y')), char_indices.next());
2836 /// assert_eq!(Some((6, 'e')), char_indices.next());
2838 /// assert_eq!(None, char_indices.next());
2841 /// Remember, [`char`]s may not match your human intuition about characters:
2844 /// let yes = "y̆es";
2846 /// let mut char_indices = yes.char_indices();
2848 /// assert_eq!(Some((0, 'y')), char_indices.next()); // not (0, 'y̆')
2849 /// assert_eq!(Some((1, '\u{0306}')), char_indices.next());
2851 /// // note the 3 here - the last character took up two bytes
2852 /// assert_eq!(Some((3, 'e')), char_indices.next());
2853 /// assert_eq!(Some((4, 's')), char_indices.next());
2855 /// assert_eq!(None, char_indices.next());
2857 #[stable(feature = "rust1", since = "1.0.0")]
2859 pub fn char_indices(&self) -> CharIndices<'_> {
2860 CharIndices { front_offset: 0, iter: self.chars() }
2863 /// An iterator over the bytes of a string slice.
2865 /// As a string slice consists of a sequence of bytes, we can iterate
2866 /// through a string slice by byte. This method returns such an iterator.
2873 /// let mut bytes = "bors".bytes();
2875 /// assert_eq!(Some(b'b'), bytes.next());
2876 /// assert_eq!(Some(b'o'), bytes.next());
2877 /// assert_eq!(Some(b'r'), bytes.next());
2878 /// assert_eq!(Some(b's'), bytes.next());
2880 /// assert_eq!(None, bytes.next());
2882 #[stable(feature = "rust1", since = "1.0.0")]
2884 pub fn bytes(&self) -> Bytes<'_> {
2885 Bytes(self.as_bytes().iter().copied())
2888 /// Splits a string slice by whitespace.
2890 /// The iterator returned will return string slices that are sub-slices of
2891 /// the original string slice, separated by any amount of whitespace.
2893 /// 'Whitespace' is defined according to the terms of the Unicode Derived
2894 /// Core Property `White_Space`. If you only want to split on ASCII whitespace
2895 /// instead, use [`split_ascii_whitespace`].
2897 /// [`split_ascii_whitespace`]: str::split_ascii_whitespace
2904 /// let mut iter = "A few words".split_whitespace();
2906 /// assert_eq!(Some("A"), iter.next());
2907 /// assert_eq!(Some("few"), iter.next());
2908 /// assert_eq!(Some("words"), iter.next());
2910 /// assert_eq!(None, iter.next());
2913 /// All kinds of whitespace are considered:
2916 /// let mut iter = " Mary had\ta\u{2009}little \n\t lamb".split_whitespace();
2917 /// assert_eq!(Some("Mary"), iter.next());
2918 /// assert_eq!(Some("had"), iter.next());
2919 /// assert_eq!(Some("a"), iter.next());
2920 /// assert_eq!(Some("little"), iter.next());
2921 /// assert_eq!(Some("lamb"), iter.next());
2923 /// assert_eq!(None, iter.next());
2925 #[stable(feature = "split_whitespace", since = "1.1.0")]
2927 pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
2928 SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
2931 /// Splits a string slice by ASCII whitespace.
2933 /// The iterator returned will return string slices that are sub-slices of
2934 /// the original string slice, separated by any amount of ASCII whitespace.
2936 /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
2938 /// [`split_whitespace`]: str::split_whitespace
2945 /// let mut iter = "A few words".split_ascii_whitespace();
2947 /// assert_eq!(Some("A"), iter.next());
2948 /// assert_eq!(Some("few"), iter.next());
2949 /// assert_eq!(Some("words"), iter.next());
2951 /// assert_eq!(None, iter.next());
2954 /// All kinds of ASCII whitespace are considered:
2957 /// let mut iter = " Mary had\ta little \n\t lamb".split_ascii_whitespace();
2958 /// assert_eq!(Some("Mary"), iter.next());
2959 /// assert_eq!(Some("had"), iter.next());
2960 /// assert_eq!(Some("a"), iter.next());
2961 /// assert_eq!(Some("little"), iter.next());
2962 /// assert_eq!(Some("lamb"), iter.next());
2964 /// assert_eq!(None, iter.next());
2966 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
2968 pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
2970 self.as_bytes().split(IsAsciiWhitespace).filter(BytesIsNotEmpty).map(UnsafeBytesToStr);
2971 SplitAsciiWhitespace { inner }
2974 /// An iterator over the lines of a string, as string slices.
2976 /// Lines are ended with either a newline (`\n`) or a carriage return with
2977 /// a line feed (`\r\n`).
2979 /// The final line ending is optional.
2986 /// let text = "foo\r\nbar\n\nbaz\n";
2987 /// let mut lines = text.lines();
2989 /// assert_eq!(Some("foo"), lines.next());
2990 /// assert_eq!(Some("bar"), lines.next());
2991 /// assert_eq!(Some(""), lines.next());
2992 /// assert_eq!(Some("baz"), lines.next());
2994 /// assert_eq!(None, lines.next());
2997 /// The final line ending isn't required:
3000 /// let text = "foo\nbar\n\r\nbaz";
3001 /// let mut lines = text.lines();
3003 /// assert_eq!(Some("foo"), lines.next());
3004 /// assert_eq!(Some("bar"), lines.next());
3005 /// assert_eq!(Some(""), lines.next());
3006 /// assert_eq!(Some("baz"), lines.next());
3008 /// assert_eq!(None, lines.next());
3010 #[stable(feature = "rust1", since = "1.0.0")]
3012 pub fn lines(&self) -> Lines<'_> {
3013 Lines(self.split_terminator('\n').map(LinesAnyMap))
3016 /// An iterator over the lines of a string.
3017 #[stable(feature = "rust1", since = "1.0.0")]
3018 #[rustc_deprecated(since = "1.4.0", reason = "use lines() instead now")]
3020 #[allow(deprecated)]
3021 pub fn lines_any(&self) -> LinesAny<'_> {
3022 LinesAny(self.lines())
3025 /// Returns an iterator of `u16` over the string encoded as UTF-16.
3032 /// let text = "Zażółć gęślą jaźń";
3034 /// let utf8_len = text.len();
3035 /// let utf16_len = text.encode_utf16().count();
3037 /// assert!(utf16_len <= utf8_len);
3039 #[stable(feature = "encode_utf16", since = "1.8.0")]
3040 pub fn encode_utf16(&self) -> EncodeUtf16<'_> {
3041 EncodeUtf16 { chars: self.chars(), extra: 0 }
3044 /// Returns `true` if the given pattern matches a sub-slice of
3045 /// this string slice.
3047 /// Returns `false` if it does not.
3049 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3050 /// function or closure that determines if a character matches.
3052 /// [pattern]: self::pattern
3059 /// let bananas = "bananas";
3061 /// assert!(bananas.contains("nana"));
3062 /// assert!(!bananas.contains("apples"));
3064 #[stable(feature = "rust1", since = "1.0.0")]
3066 pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
3067 pat.is_contained_in(self)
3070 /// Returns `true` if the given pattern matches a prefix of this
3073 /// Returns `false` if it does not.
3075 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3076 /// function or closure that determines if a character matches.
3078 /// [pattern]: self::pattern
3085 /// let bananas = "bananas";
3087 /// assert!(bananas.starts_with("bana"));
3088 /// assert!(!bananas.starts_with("nana"));
3090 #[stable(feature = "rust1", since = "1.0.0")]
3091 pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
3092 pat.is_prefix_of(self)
3095 /// Returns `true` if the given pattern matches a suffix of this
3098 /// Returns `false` if it does not.
3100 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3101 /// function or closure that determines if a character matches.
3103 /// [pattern]: self::pattern
3110 /// let bananas = "bananas";
3112 /// assert!(bananas.ends_with("anas"));
3113 /// assert!(!bananas.ends_with("nana"));
3115 #[stable(feature = "rust1", since = "1.0.0")]
3116 pub fn ends_with<'a, P>(&'a self, pat: P) -> bool
3118 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3120 pat.is_suffix_of(self)
3123 /// Returns the byte index of the first character of this string slice that
3124 /// matches the pattern.
3126 /// Returns [`None`] if the pattern doesn't match.
3128 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3129 /// function or closure that determines if a character matches.
3131 /// [pattern]: self::pattern
3135 /// Simple patterns:
3138 /// let s = "Löwe 老虎 Léopard Gepardi";
3140 /// assert_eq!(s.find('L'), Some(0));
3141 /// assert_eq!(s.find('é'), Some(14));
3142 /// assert_eq!(s.find("pard"), Some(17));
3145 /// More complex patterns using point-free style and closures:
3148 /// let s = "Löwe 老虎 Léopard";
3150 /// assert_eq!(s.find(char::is_whitespace), Some(5));
3151 /// assert_eq!(s.find(char::is_lowercase), Some(1));
3152 /// assert_eq!(s.find(|c: char| c.is_whitespace() || c.is_lowercase()), Some(1));
3153 /// assert_eq!(s.find(|c: char| (c < 'o') && (c > 'a')), Some(4));
3156 /// Not finding the pattern:
3159 /// let s = "Löwe 老虎 Léopard";
3160 /// let x: &[_] = &['1', '2'];
3162 /// assert_eq!(s.find(x), None);
3164 #[stable(feature = "rust1", since = "1.0.0")]
3166 pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
3167 pat.into_searcher(self).next_match().map(|(i, _)| i)
3170 /// Returns the byte index for the first character of the rightmost match of the pattern in
3171 /// this string slice.
3173 /// Returns [`None`] if the pattern doesn't match.
3175 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3176 /// function or closure that determines if a character matches.
3178 /// [pattern]: self::pattern
3182 /// Simple patterns:
3185 /// let s = "Löwe 老虎 Léopard Gepardi";
3187 /// assert_eq!(s.rfind('L'), Some(13));
3188 /// assert_eq!(s.rfind('é'), Some(14));
3189 /// assert_eq!(s.rfind("pard"), Some(24));
3192 /// More complex patterns with closures:
3195 /// let s = "Löwe 老虎 Léopard";
3197 /// assert_eq!(s.rfind(char::is_whitespace), Some(12));
3198 /// assert_eq!(s.rfind(char::is_lowercase), Some(20));
3201 /// Not finding the pattern:
3204 /// let s = "Löwe 老虎 Léopard";
3205 /// let x: &[_] = &['1', '2'];
3207 /// assert_eq!(s.rfind(x), None);
3209 #[stable(feature = "rust1", since = "1.0.0")]
3211 pub fn rfind<'a, P>(&'a self, pat: P) -> Option<usize>
3213 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3215 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
3218 /// An iterator over substrings of this string slice, separated by
3219 /// characters matched by a pattern.
3221 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3222 /// function or closure that determines if a character matches.
3224 /// [pattern]: self::pattern
3226 /// # Iterator behavior
3228 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3229 /// allows a reverse search and forward/reverse search yields the same
3230 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3232 /// If the pattern allows a reverse search but its results might differ
3233 /// from a forward search, the [`rsplit`] method can be used.
3235 /// [`rsplit`]: str::rsplit
3239 /// Simple patterns:
3242 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
3243 /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
3245 /// let v: Vec<&str> = "".split('X').collect();
3246 /// assert_eq!(v, [""]);
3248 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
3249 /// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
3251 /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect();
3252 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3254 /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect();
3255 /// assert_eq!(v, ["abc", "def", "ghi"]);
3257 /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect();
3258 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3261 /// A more complex pattern, using a closure:
3264 /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect();
3265 /// assert_eq!(v, ["abc", "def", "ghi"]);
3268 /// If a string contains multiple contiguous separators, you will end up
3269 /// with empty strings in the output:
3272 /// let x = "||||a||b|c".to_string();
3273 /// let d: Vec<_> = x.split('|').collect();
3275 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3278 /// Contiguous separators are separated by the empty string.
3281 /// let x = "(///)".to_string();
3282 /// let d: Vec<_> = x.split('/').collect();
3284 /// assert_eq!(d, &["(", "", "", ")"]);
3287 /// Separators at the start or end of a string are neighbored
3288 /// by empty strings.
3291 /// let d: Vec<_> = "010".split("0").collect();
3292 /// assert_eq!(d, &["", "1", ""]);
3295 /// When the empty string is used as a separator, it separates
3296 /// every character in the string, along with the beginning
3297 /// and end of the string.
3300 /// let f: Vec<_> = "rust".split("").collect();
3301 /// assert_eq!(f, &["", "r", "u", "s", "t", ""]);
3304 /// Contiguous separators can lead to possibly surprising behavior
3305 /// when whitespace is used as the separator. This code is correct:
3308 /// let x = " a b c".to_string();
3309 /// let d: Vec<_> = x.split(' ').collect();
3311 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3314 /// It does _not_ give you:
3317 /// assert_eq!(d, &["a", "b", "c"]);
3320 /// Use [`split_whitespace`] for this behavior.
3322 /// [`split_whitespace`]: str::split_whitespace
3323 #[stable(feature = "rust1", since = "1.0.0")]
3325 pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
3326 Split(SplitInternal {
3329 matcher: pat.into_searcher(self),
3330 allow_trailing_empty: true,
3335 /// An iterator over substrings of this string slice, separated by
3336 /// characters matched by a pattern. Differs from the iterator produced by
3337 /// `split` in that `split_inclusive` leaves the matched part as the
3338 /// terminator of the substring.
3340 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3341 /// function or closure that determines if a character matches.
3343 /// [pattern]: self::pattern
3348 /// #![feature(split_inclusive)]
3349 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
3350 /// .split_inclusive('\n').collect();
3351 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
3354 /// If the last element of the string is matched,
3355 /// that element will be considered the terminator of the preceding substring.
3356 /// That substring will be the last item returned by the iterator.
3359 /// #![feature(split_inclusive)]
3360 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n"
3361 /// .split_inclusive('\n').collect();
3362 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb.\n"]);
3364 #[unstable(feature = "split_inclusive", issue = "72360")]
3366 pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> {
3367 SplitInclusive(SplitInternal {
3370 matcher: pat.into_searcher(self),
3371 allow_trailing_empty: false,
3376 /// An iterator over substrings of the given string slice, separated by
3377 /// characters matched by a pattern and yielded in reverse order.
3379 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3380 /// function or closure that determines if a character matches.
3382 /// [pattern]: self::pattern
3384 /// # Iterator behavior
3386 /// The returned iterator requires that the pattern supports a reverse
3387 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3388 /// search yields the same elements.
3390 /// For iterating from the front, the [`split`] method can be used.
3392 /// [`split`]: str::split
3396 /// Simple patterns:
3399 /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
3400 /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
3402 /// let v: Vec<&str> = "".rsplit('X').collect();
3403 /// assert_eq!(v, [""]);
3405 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect();
3406 /// assert_eq!(v, ["leopard", "tiger", "", "lion"]);
3408 /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
3409 /// assert_eq!(v, ["leopard", "tiger", "lion"]);
3412 /// A more complex pattern, using a closure:
3415 /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect();
3416 /// assert_eq!(v, ["ghi", "def", "abc"]);
3418 #[stable(feature = "rust1", since = "1.0.0")]
3420 pub fn rsplit<'a, P>(&'a self, pat: P) -> RSplit<'a, P>
3422 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3424 RSplit(self.split(pat).0)
3427 /// An iterator over substrings of the given string slice, separated by
3428 /// characters matched by a pattern.
3430 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3431 /// function or closure that determines if a character matches.
3433 /// [pattern]: self::pattern
3435 /// Equivalent to [`split`], except that the trailing substring
3436 /// is skipped if empty.
3438 /// [`split`]: str::split
3440 /// This method can be used for string data that is _terminated_,
3441 /// rather than _separated_ by a pattern.
3443 /// # Iterator behavior
3445 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3446 /// allows a reverse search and forward/reverse search yields the same
3447 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3449 /// If the pattern allows a reverse search but its results might differ
3450 /// from a forward search, the [`rsplit_terminator`] method can be used.
3452 /// [`rsplit_terminator`]: str::rsplit_terminator
3459 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
3460 /// assert_eq!(v, ["A", "B"]);
3462 /// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
3463 /// assert_eq!(v, ["A", "", "B", ""]);
3465 #[stable(feature = "rust1", since = "1.0.0")]
3467 pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
3468 SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 })
3471 /// An iterator over substrings of `self`, separated by characters
3472 /// matched by a pattern and yielded in reverse order.
3474 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3475 /// function or closure that determines if a character matches.
3477 /// [pattern]: self::pattern
3479 /// Equivalent to [`split`], except that the trailing substring is
3480 /// skipped if empty.
3482 /// [`split`]: str::split
3484 /// This method can be used for string data that is _terminated_,
3485 /// rather than _separated_ by a pattern.
3487 /// # Iterator behavior
3489 /// The returned iterator requires that the pattern supports a
3490 /// reverse search, and it will be double ended if a forward/reverse
3491 /// search yields the same elements.
3493 /// For iterating from the front, the [`split_terminator`] method can be
3496 /// [`split_terminator`]: str::split_terminator
3501 /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect();
3502 /// assert_eq!(v, ["B", "A"]);
3504 /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
3505 /// assert_eq!(v, ["", "B", "", "A"]);
3507 #[stable(feature = "rust1", since = "1.0.0")]
3509 pub fn rsplit_terminator<'a, P>(&'a self, pat: P) -> RSplitTerminator<'a, P>
3511 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3513 RSplitTerminator(self.split_terminator(pat).0)
3516 /// An iterator over substrings of the given string slice, separated by a
3517 /// pattern, restricted to returning at most `n` items.
3519 /// If `n` substrings are returned, the last substring (the `n`th substring)
3520 /// will contain the remainder of the string.
3522 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3523 /// function or closure that determines if a character matches.
3525 /// [pattern]: self::pattern
3527 /// # Iterator behavior
3529 /// The returned iterator will not be double ended, because it is
3530 /// not efficient to support.
3532 /// If the pattern allows a reverse search, the [`rsplitn`] method can be
3535 /// [`rsplitn`]: str::rsplitn
3539 /// Simple patterns:
3542 /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect();
3543 /// assert_eq!(v, ["Mary", "had", "a little lambda"]);
3545 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect();
3546 /// assert_eq!(v, ["lion", "", "tigerXleopard"]);
3548 /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
3549 /// assert_eq!(v, ["abcXdef"]);
3551 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
3552 /// assert_eq!(v, [""]);
3555 /// A more complex pattern, using a closure:
3558 /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect();
3559 /// assert_eq!(v, ["abc", "defXghi"]);
3561 #[stable(feature = "rust1", since = "1.0.0")]
3563 pub fn splitn<'a, P: Pattern<'a>>(&'a self, n: usize, pat: P) -> SplitN<'a, P> {
3564 SplitN(SplitNInternal { iter: self.split(pat).0, count: n })
3567 /// An iterator over substrings of this string slice, separated by a
3568 /// pattern, starting from the end of the string, restricted to returning
3569 /// at most `n` items.
3571 /// If `n` substrings are returned, the last substring (the `n`th substring)
3572 /// will contain the remainder of the string.
3574 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3575 /// function or closure that determines if a character matches.
3577 /// [pattern]: self::pattern
3579 /// # Iterator behavior
3581 /// The returned iterator will not be double ended, because it is not
3582 /// efficient to support.
3584 /// For splitting from the front, the [`splitn`] method can be used.
3586 /// [`splitn`]: str::splitn
3590 /// Simple patterns:
3593 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect();
3594 /// assert_eq!(v, ["lamb", "little", "Mary had a"]);
3596 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect();
3597 /// assert_eq!(v, ["leopard", "tiger", "lionX"]);
3599 /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect();
3600 /// assert_eq!(v, ["leopard", "lion::tiger"]);
3603 /// A more complex pattern, using a closure:
3606 /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect();
3607 /// assert_eq!(v, ["ghi", "abc1def"]);
3609 #[stable(feature = "rust1", since = "1.0.0")]
3611 pub fn rsplitn<'a, P>(&'a self, n: usize, pat: P) -> RSplitN<'a, P>
3613 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3615 RSplitN(self.splitn(n, pat).0)
3618 /// Splits the string on the first occurrence of the specified delimiter and
3619 /// returns prefix before delimiter and suffix after delimiter.
3624 /// #![feature(str_split_once)]
3626 /// assert_eq!("cfg".split_once('='), None);
3627 /// assert_eq!("cfg=foo".split_once('='), Some(("cfg", "foo")));
3628 /// assert_eq!("cfg=foo=bar".split_once('='), Some(("cfg", "foo=bar")));
3630 #[unstable(feature = "str_split_once", reason = "newly added", issue = "74773")]
3632 pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> {
3633 let (start, end) = delimiter.into_searcher(self).next_match()?;
3634 Some((&self[..start], &self[end..]))
3637 /// Splits the string on the last occurrence of the specified delimiter and
3638 /// returns prefix before delimiter and suffix after delimiter.
3643 /// #![feature(str_split_once)]
3645 /// assert_eq!("cfg".rsplit_once('='), None);
3646 /// assert_eq!("cfg=foo".rsplit_once('='), Some(("cfg", "foo")));
3647 /// assert_eq!("cfg=foo=bar".rsplit_once('='), Some(("cfg=foo", "bar")));
3649 #[unstable(feature = "str_split_once", reason = "newly added", issue = "74773")]
3651 pub fn rsplit_once<'a, P>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)>
3653 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3655 let (start, end) = delimiter.into_searcher(self).next_match_back()?;
3656 Some((&self[..start], &self[end..]))
3659 /// An iterator over the disjoint matches of a pattern within the given string
3662 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3663 /// function or closure that determines if a character matches.
3665 /// [pattern]: self::pattern
3667 /// # Iterator behavior
3669 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3670 /// allows a reverse search and forward/reverse search yields the same
3671 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3673 /// If the pattern allows a reverse search but its results might differ
3674 /// from a forward search, the [`rmatches`] method can be used.
3676 /// [`rmatches`]: str::matches
3683 /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
3684 /// assert_eq!(v, ["abc", "abc", "abc"]);
3686 /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect();
3687 /// assert_eq!(v, ["1", "2", "3"]);
3689 #[stable(feature = "str_matches", since = "1.2.0")]
3691 pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
3692 Matches(MatchesInternal(pat.into_searcher(self)))
3695 /// An iterator over the disjoint matches of a pattern within this string slice,
3696 /// yielded in reverse order.
3698 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3699 /// function or closure that determines if a character matches.
3701 /// [pattern]: self::pattern
3703 /// # Iterator behavior
3705 /// The returned iterator requires that the pattern supports a reverse
3706 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3707 /// search yields the same elements.
3709 /// For iterating from the front, the [`matches`] method can be used.
3711 /// [`matches`]: str::matches
3718 /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
3719 /// assert_eq!(v, ["abc", "abc", "abc"]);
3721 /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect();
3722 /// assert_eq!(v, ["3", "2", "1"]);
3724 #[stable(feature = "str_matches", since = "1.2.0")]
3726 pub fn rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P>
3728 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3730 RMatches(self.matches(pat).0)
3733 /// An iterator over the disjoint matches of a pattern within this string
3734 /// slice as well as the index that the match starts at.
3736 /// For matches of `pat` within `self` that overlap, only the indices
3737 /// corresponding to the first match are returned.
3739 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3740 /// function or closure that determines if a character matches.
3742 /// [pattern]: self::pattern
3744 /// # Iterator behavior
3746 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3747 /// allows a reverse search and forward/reverse search yields the same
3748 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3750 /// If the pattern allows a reverse search but its results might differ
3751 /// from a forward search, the [`rmatch_indices`] method can be used.
3753 /// [`rmatch_indices`]: str::match_indices
3760 /// let v: Vec<_> = "abcXXXabcYYYabc".match_indices("abc").collect();
3761 /// assert_eq!(v, [(0, "abc"), (6, "abc"), (12, "abc")]);
3763 /// let v: Vec<_> = "1abcabc2".match_indices("abc").collect();
3764 /// assert_eq!(v, [(1, "abc"), (4, "abc")]);
3766 /// let v: Vec<_> = "ababa".match_indices("aba").collect();
3767 /// assert_eq!(v, [(0, "aba")]); // only the first `aba`
3769 #[stable(feature = "str_match_indices", since = "1.5.0")]
3771 pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
3772 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
3775 /// An iterator over the disjoint matches of a pattern within `self`,
3776 /// yielded in reverse order along with the index of the match.
3778 /// For matches of `pat` within `self` that overlap, only the indices
3779 /// corresponding to the last match are returned.
3781 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
3782 /// function or closure that determines if a character matches.
3784 /// [pattern]: self::pattern
3786 /// # Iterator behavior
3788 /// The returned iterator requires that the pattern supports a reverse
3789 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3790 /// search yields the same elements.
3792 /// For iterating from the front, the [`match_indices`] method can be used.
3794 /// [`match_indices`]: str::match_indices
3801 /// let v: Vec<_> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
3802 /// assert_eq!(v, [(12, "abc"), (6, "abc"), (0, "abc")]);
3804 /// let v: Vec<_> = "1abcabc2".rmatch_indices("abc").collect();
3805 /// assert_eq!(v, [(4, "abc"), (1, "abc")]);
3807 /// let v: Vec<_> = "ababa".rmatch_indices("aba").collect();
3808 /// assert_eq!(v, [(2, "aba")]); // only the last `aba`
3810 #[stable(feature = "str_match_indices", since = "1.5.0")]
3812 pub fn rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P>
3814 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3816 RMatchIndices(self.match_indices(pat).0)
3819 /// Returns a string slice with leading and trailing whitespace removed.
3821 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3822 /// Core Property `White_Space`.
3829 /// let s = " Hello\tworld\t";
3831 /// assert_eq!("Hello\tworld", s.trim());
3833 #[must_use = "this returns the trimmed string as a slice, \
3834 without modifying the original"]
3835 #[stable(feature = "rust1", since = "1.0.0")]
3836 pub fn trim(&self) -> &str {
3837 self.trim_matches(|c: char| c.is_whitespace())
3840 /// Returns a string slice with leading whitespace removed.
3842 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3843 /// Core Property `White_Space`.
3845 /// # Text directionality
3847 /// A string is a sequence of bytes. `start` in this context means the first
3848 /// position of that byte string; for a left-to-right language like English or
3849 /// Russian, this will be left side, and for right-to-left languages like
3850 /// Arabic or Hebrew, this will be the right side.
3857 /// let s = " Hello\tworld\t";
3858 /// assert_eq!("Hello\tworld\t", s.trim_start());
3864 /// let s = " English ";
3865 /// assert!(Some('E') == s.trim_start().chars().next());
3867 /// let s = " עברית ";
3868 /// assert!(Some('ע') == s.trim_start().chars().next());
3870 #[must_use = "this returns the trimmed string as a new slice, \
3871 without modifying the original"]
3872 #[stable(feature = "trim_direction", since = "1.30.0")]
3873 pub fn trim_start(&self) -> &str {
3874 self.trim_start_matches(|c: char| c.is_whitespace())
3877 /// Returns a string slice with trailing whitespace removed.
3879 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3880 /// Core Property `White_Space`.
3882 /// # Text directionality
3884 /// A string is a sequence of bytes. `end` in this context means the last
3885 /// position of that byte string; for a left-to-right language like English or
3886 /// Russian, this will be right side, and for right-to-left languages like
3887 /// Arabic or Hebrew, this will be the left side.
3894 /// let s = " Hello\tworld\t";
3895 /// assert_eq!(" Hello\tworld", s.trim_end());
3901 /// let s = " English ";
3902 /// assert!(Some('h') == s.trim_end().chars().rev().next());
3904 /// let s = " עברית ";
3905 /// assert!(Some('ת') == s.trim_end().chars().rev().next());
3907 #[must_use = "this returns the trimmed string as a new slice, \
3908 without modifying the original"]
3909 #[stable(feature = "trim_direction", since = "1.30.0")]
3910 pub fn trim_end(&self) -> &str {
3911 self.trim_end_matches(|c: char| c.is_whitespace())
3914 /// Returns a string slice with leading whitespace removed.
3916 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3917 /// Core Property `White_Space`.
3919 /// # Text directionality
3921 /// A string is a sequence of bytes. 'Left' in this context means the first
3922 /// position of that byte string; for a language like Arabic or Hebrew
3923 /// which are 'right to left' rather than 'left to right', this will be
3924 /// the _right_ side, not the left.
3931 /// let s = " Hello\tworld\t";
3933 /// assert_eq!("Hello\tworld\t", s.trim_left());
3939 /// let s = " English";
3940 /// assert!(Some('E') == s.trim_left().chars().next());
3942 /// let s = " עברית";
3943 /// assert!(Some('ע') == s.trim_left().chars().next());
3945 #[stable(feature = "rust1", since = "1.0.0")]
3948 reason = "superseded by `trim_start`",
3949 suggestion = "trim_start"
3951 pub fn trim_left(&self) -> &str {
3955 /// Returns a string slice with trailing whitespace removed.
3957 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3958 /// Core Property `White_Space`.
3960 /// # Text directionality
3962 /// A string is a sequence of bytes. 'Right' in this context means the last
3963 /// position of that byte string; for a language like Arabic or Hebrew
3964 /// which are 'right to left' rather than 'left to right', this will be
3965 /// the _left_ side, not the right.
3972 /// let s = " Hello\tworld\t";
3974 /// assert_eq!(" Hello\tworld", s.trim_right());
3980 /// let s = "English ";
3981 /// assert!(Some('h') == s.trim_right().chars().rev().next());
3983 /// let s = "עברית ";
3984 /// assert!(Some('ת') == s.trim_right().chars().rev().next());
3986 #[stable(feature = "rust1", since = "1.0.0")]
3989 reason = "superseded by `trim_end`",
3990 suggestion = "trim_end"
3992 pub fn trim_right(&self) -> &str {
3996 /// Returns a string slice with all prefixes and suffixes that match a
3997 /// pattern repeatedly removed.
3999 /// The [pattern] can be a [`char`], a slice of [`char`]s, or a function
4000 /// or closure that determines if a character matches.
4002 /// [pattern]: self::pattern
4006 /// Simple patterns:
4009 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
4010 /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar");
4012 /// let x: &[_] = &['1', '2'];
4013 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
4016 /// A more complex pattern, using a closure:
4019 /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar");
4021 #[must_use = "this returns the trimmed string as a new slice, \
4022 without modifying the original"]
4023 #[stable(feature = "rust1", since = "1.0.0")]
4024 pub fn trim_matches<'a, P>(&'a self, pat: P) -> &'a str
4026 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
4030 let mut matcher = pat.into_searcher(self);
4031 if let Some((a, b)) = matcher.next_reject() {
4033 j = b; // Remember earliest known match, correct it below if
4034 // last match is different
4036 if let Some((_, b)) = matcher.next_reject_back() {
4039 // SAFETY: `Searcher` is known to return valid indices.
4040 unsafe { self.get_unchecked(i..j) }
4043 /// Returns a string slice with all prefixes that match a pattern
4044 /// repeatedly removed.
4046 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4047 /// function or closure that determines if a character matches.
4049 /// [pattern]: self::pattern
4051 /// # Text directionality
4053 /// A string is a sequence of bytes. `start` in this context means the first
4054 /// position of that byte string; for a left-to-right language like English or
4055 /// Russian, this will be left side, and for right-to-left languages like
4056 /// Arabic or Hebrew, this will be the right side.
4063 /// assert_eq!("11foo1bar11".trim_start_matches('1'), "foo1bar11");
4064 /// assert_eq!("123foo1bar123".trim_start_matches(char::is_numeric), "foo1bar123");
4066 /// let x: &[_] = &['1', '2'];
4067 /// assert_eq!("12foo1bar12".trim_start_matches(x), "foo1bar12");
4069 #[must_use = "this returns the trimmed string as a new slice, \
4070 without modifying the original"]
4071 #[stable(feature = "trim_direction", since = "1.30.0")]
4072 pub fn trim_start_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
4073 let mut i = self.len();
4074 let mut matcher = pat.into_searcher(self);
4075 if let Some((a, _)) = matcher.next_reject() {
4078 // SAFETY: `Searcher` is known to return valid indices.
4079 unsafe { self.get_unchecked(i..self.len()) }
4082 /// Returns a string slice with the prefix removed.
4084 /// If the string starts with the pattern `prefix`, `Some` is returned with the substring where
4085 /// the prefix is removed. Unlike `trim_start_matches`, this method removes the prefix exactly
4088 /// If the string does not start with `prefix`, `None` is returned.
4090 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4091 /// function or closure that determines if a character matches.
4093 /// [pattern]: self::pattern
4098 /// assert_eq!("foo:bar".strip_prefix("foo:"), Some("bar"));
4099 /// assert_eq!("foo:bar".strip_prefix("bar"), None);
4100 /// assert_eq!("foofoo".strip_prefix("foo"), Some("foo"));
4102 #[must_use = "this returns the remaining substring as a new slice, \
4103 without modifying the original"]
4104 #[stable(feature = "str_strip", since = "1.45.0")]
4105 pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> {
4106 prefix.strip_prefix_of(self)
4109 /// Returns a string slice with the suffix removed.
4111 /// If the string ends with the pattern `suffix`, `Some` is returned with the substring where
4112 /// the suffix is removed. Unlike `trim_end_matches`, this method removes the suffix exactly
4115 /// If the string does not end with `suffix`, `None` is returned.
4117 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4118 /// function or closure that determines if a character matches.
4120 /// [pattern]: self::pattern
4125 /// assert_eq!("bar:foo".strip_suffix(":foo"), Some("bar"));
4126 /// assert_eq!("bar:foo".strip_suffix("bar"), None);
4127 /// assert_eq!("foofoo".strip_suffix("foo"), Some("foo"));
4129 #[must_use = "this returns the remaining substring as a new slice, \
4130 without modifying the original"]
4131 #[stable(feature = "str_strip", since = "1.45.0")]
4132 pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a str>
4135 <P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
4137 suffix.strip_suffix_of(self)
4140 /// Returns a string slice with all suffixes that match a pattern
4141 /// repeatedly removed.
4143 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4144 /// function or closure that determines if a character matches.
4146 /// [pattern]: self::pattern
4148 /// # Text directionality
4150 /// A string is a sequence of bytes. `end` in this context means the last
4151 /// position of that byte string; for a left-to-right language like English or
4152 /// Russian, this will be right side, and for right-to-left languages like
4153 /// Arabic or Hebrew, this will be the left side.
4157 /// Simple patterns:
4160 /// assert_eq!("11foo1bar11".trim_end_matches('1'), "11foo1bar");
4161 /// assert_eq!("123foo1bar123".trim_end_matches(char::is_numeric), "123foo1bar");
4163 /// let x: &[_] = &['1', '2'];
4164 /// assert_eq!("12foo1bar12".trim_end_matches(x), "12foo1bar");
4167 /// A more complex pattern, using a closure:
4170 /// assert_eq!("1fooX".trim_end_matches(|c| c == '1' || c == 'X'), "1foo");
4172 #[must_use = "this returns the trimmed string as a new slice, \
4173 without modifying the original"]
4174 #[stable(feature = "trim_direction", since = "1.30.0")]
4175 pub fn trim_end_matches<'a, P>(&'a self, pat: P) -> &'a str
4177 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
4180 let mut matcher = pat.into_searcher(self);
4181 if let Some((_, b)) = matcher.next_reject_back() {
4184 // SAFETY: `Searcher` is known to return valid indices.
4185 unsafe { self.get_unchecked(0..j) }
4188 /// Returns a string slice with all prefixes that match a pattern
4189 /// repeatedly removed.
4191 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4192 /// function or closure that determines if a character matches.
4194 /// [pattern]: self::pattern
4196 /// # Text directionality
4198 /// A string is a sequence of bytes. 'Left' in this context means the first
4199 /// position of that byte string; for a language like Arabic or Hebrew
4200 /// which are 'right to left' rather than 'left to right', this will be
4201 /// the _right_ side, not the left.
4208 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
4209 /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123");
4211 /// let x: &[_] = &['1', '2'];
4212 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
4214 #[stable(feature = "rust1", since = "1.0.0")]
4217 reason = "superseded by `trim_start_matches`",
4218 suggestion = "trim_start_matches"
4220 pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
4221 self.trim_start_matches(pat)
4224 /// Returns a string slice with all suffixes that match a pattern
4225 /// repeatedly removed.
4227 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
4228 /// function or closure that determines if a character matches.
4230 /// [pattern]: self::pattern
4232 /// # Text directionality
4234 /// A string is a sequence of bytes. 'Right' in this context means the last
4235 /// position of that byte string; for a language like Arabic or Hebrew
4236 /// which are 'right to left' rather than 'left to right', this will be
4237 /// the _left_ side, not the right.
4241 /// Simple patterns:
4244 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
4245 /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar");
4247 /// let x: &[_] = &['1', '2'];
4248 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
4251 /// A more complex pattern, using a closure:
4254 /// assert_eq!("1fooX".trim_right_matches(|c| c == '1' || c == 'X'), "1foo");
4256 #[stable(feature = "rust1", since = "1.0.0")]
4259 reason = "superseded by `trim_end_matches`",
4260 suggestion = "trim_end_matches"
4262 pub fn trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str
4264 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
4266 self.trim_end_matches(pat)
4269 /// Parses this string slice into another type.
4271 /// Because `parse` is so general, it can cause problems with type
4272 /// inference. As such, `parse` is one of the few times you'll see
4273 /// the syntax affectionately known as the 'turbofish': `::<>`. This
4274 /// helps the inference algorithm understand specifically which type
4275 /// you're trying to parse into.
4277 /// `parse` can parse any type that implements the [`FromStr`] trait.
4282 /// Will return [`Err`] if it's not possible to parse this string slice into
4283 /// the desired type.
4285 /// [`Err`]: FromStr::Err
4292 /// let four: u32 = "4".parse().unwrap();
4294 /// assert_eq!(4, four);
4297 /// Using the 'turbofish' instead of annotating `four`:
4300 /// let four = "4".parse::<u32>();
4302 /// assert_eq!(Ok(4), four);
4305 /// Failing to parse:
4308 /// let nope = "j".parse::<u32>();
4310 /// assert!(nope.is_err());
4313 #[stable(feature = "rust1", since = "1.0.0")]
4314 pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
4315 FromStr::from_str(self)
4318 /// Checks if all characters in this string are within the ASCII range.
4323 /// let ascii = "hello!\n";
4324 /// let non_ascii = "Grüße, Jürgen ❤";
4326 /// assert!(ascii.is_ascii());
4327 /// assert!(!non_ascii.is_ascii());
4329 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4331 pub fn is_ascii(&self) -> bool {
4332 // We can treat each byte as character here: all multibyte characters
4333 // start with a byte that is not in the ascii range, so we will stop
4335 self.as_bytes().is_ascii()
4338 /// Checks that two strings are an ASCII case-insensitive match.
4340 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
4341 /// but without allocating and copying temporaries.
4346 /// assert!("Ferris".eq_ignore_ascii_case("FERRIS"));
4347 /// assert!("Ferrös".eq_ignore_ascii_case("FERRöS"));
4348 /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
4350 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4352 pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
4353 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
4356 /// Converts this string to its ASCII upper case equivalent in-place.
4358 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
4359 /// but non-ASCII letters are unchanged.
4361 /// To return a new uppercased value without modifying the existing one, use
4362 /// [`to_ascii_uppercase`].
4364 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
4369 /// let mut s = String::from("Grüße, Jürgen ❤");
4371 /// s.make_ascii_uppercase();
4373 /// assert_eq!("GRüßE, JüRGEN ❤", s);
4375 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4376 pub fn make_ascii_uppercase(&mut self) {
4377 // SAFETY: safe because we transmute two types with the same layout.
4378 let me = unsafe { self.as_bytes_mut() };
4379 me.make_ascii_uppercase()
4382 /// Converts this string to its ASCII lower case equivalent in-place.
4384 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
4385 /// but non-ASCII letters are unchanged.
4387 /// To return a new lowercased value without modifying the existing one, use
4388 /// [`to_ascii_lowercase`].
4390 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
4395 /// let mut s = String::from("GRÜßE, JÜRGEN ❤");
4397 /// s.make_ascii_lowercase();
4399 /// assert_eq!("grÜße, jÜrgen ❤", s);
4401 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4402 pub fn make_ascii_lowercase(&mut self) {
4403 // SAFETY: safe because we transmute two types with the same layout.
4404 let me = unsafe { self.as_bytes_mut() };
4405 me.make_ascii_lowercase()
4408 /// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
4410 /// Note: only extended grapheme codepoints that begin the string will be
4418 /// for c in "❤\n!".escape_debug() {
4419 /// print!("{}", c);
4424 /// Using `println!` directly:
4427 /// println!("{}", "❤\n!".escape_debug());
4431 /// Both are equivalent to:
4434 /// println!("❤\\n!");
4437 /// Using `to_string`:
4440 /// assert_eq!("❤\n!".escape_debug().to_string(), "❤\\n!");
4442 #[stable(feature = "str_escape", since = "1.34.0")]
4443 pub fn escape_debug(&self) -> EscapeDebug<'_> {
4444 let mut chars = self.chars();
4448 .map(|first| first.escape_debug_ext(true))
4451 .chain(chars.flat_map(CharEscapeDebugContinue)),
4455 /// Return an iterator that escapes each char in `self` with [`char::escape_default`].
4462 /// for c in "❤\n!".escape_default() {
4463 /// print!("{}", c);
4468 /// Using `println!` directly:
4471 /// println!("{}", "❤\n!".escape_default());
4475 /// Both are equivalent to:
4478 /// println!("\\u{{2764}}\\n!");
4481 /// Using `to_string`:
4484 /// assert_eq!("❤\n!".escape_default().to_string(), "\\u{2764}\\n!");
4486 #[stable(feature = "str_escape", since = "1.34.0")]
4487 pub fn escape_default(&self) -> EscapeDefault<'_> {
4488 EscapeDefault { inner: self.chars().flat_map(CharEscapeDefault) }
4491 /// Return an iterator that escapes each char in `self` with [`char::escape_unicode`].
4498 /// for c in "❤\n!".escape_unicode() {
4499 /// print!("{}", c);
4504 /// Using `println!` directly:
4507 /// println!("{}", "❤\n!".escape_unicode());
4511 /// Both are equivalent to:
4514 /// println!("\\u{{2764}}\\u{{a}}\\u{{21}}");
4517 /// Using `to_string`:
4520 /// assert_eq!("❤\n!".escape_unicode().to_string(), "\\u{2764}\\u{a}\\u{21}");
4522 #[stable(feature = "str_escape", since = "1.34.0")]
4523 pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
4524 EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) }
4530 struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
4531 c.escape_debug_ext(false)
4535 struct CharEscapeUnicode impl Fn = |c: char| -> char::EscapeUnicode {
4539 struct CharEscapeDefault impl Fn = |c: char| -> char::EscapeDefault {
4544 #[stable(feature = "rust1", since = "1.0.0")]
4545 impl AsRef<[u8]> for str {
4547 fn as_ref(&self) -> &[u8] {
4552 #[stable(feature = "rust1", since = "1.0.0")]
4553 impl Default for &str {
4554 /// Creates an empty str
4555 fn default() -> Self {
4560 #[stable(feature = "default_mut_str", since = "1.28.0")]
4561 impl Default for &mut str {
4562 /// Creates an empty mutable str
4563 fn default() -> Self {
4564 // SAFETY: The empty string is valid UTF-8.
4565 unsafe { from_utf8_unchecked_mut(&mut []) }
4569 /// An iterator over the non-whitespace substrings of a string,
4570 /// separated by any amount of whitespace.
4572 /// This struct is created by the [`split_whitespace`] method on [`str`].
4573 /// See its documentation for more.
4575 /// [`split_whitespace`]: str::split_whitespace
4576 #[stable(feature = "split_whitespace", since = "1.1.0")]
4577 #[derive(Clone, Debug)]
4578 pub struct SplitWhitespace<'a> {
4579 inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
4582 /// An iterator over the non-ASCII-whitespace substrings of a string,
4583 /// separated by any amount of ASCII whitespace.
4585 /// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
4586 /// See its documentation for more.
4588 /// [`split_ascii_whitespace`]: str::split_ascii_whitespace
4589 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4590 #[derive(Clone, Debug)]
4591 pub struct SplitAsciiWhitespace<'a> {
4592 inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, BytesIsNotEmpty>, UnsafeBytesToStr>,
4595 /// An iterator over the substrings of a string,
4596 /// terminated by a substring matching to a predicate function
4597 /// Unlike `Split`, it contains the matched part as a terminator
4598 /// of the subslice.
4600 /// This struct is created by the [`split_inclusive`] method on [`str`].
4601 /// See its documentation for more.
4603 /// [`split_inclusive`]: str::split_inclusive
4604 #[unstable(feature = "split_inclusive", issue = "72360")]
4605 pub struct SplitInclusive<'a, P: Pattern<'a>>(SplitInternal<'a, P>);
4609 struct IsWhitespace impl Fn = |c: char| -> bool {
4614 struct IsAsciiWhitespace impl Fn = |byte: &u8| -> bool {
4615 byte.is_ascii_whitespace()
4619 struct IsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b str| -> bool {
4624 struct BytesIsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b [u8]| -> bool {
4629 struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
4631 unsafe { from_utf8_unchecked(bytes) }
4635 #[stable(feature = "split_whitespace", since = "1.1.0")]
4636 impl<'a> Iterator for SplitWhitespace<'a> {
4637 type Item = &'a str;
4640 fn next(&mut self) -> Option<&'a str> {
4645 fn size_hint(&self) -> (usize, Option<usize>) {
4646 self.inner.size_hint()
4650 fn last(mut self) -> Option<&'a str> {
4655 #[stable(feature = "split_whitespace", since = "1.1.0")]
4656 impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
4658 fn next_back(&mut self) -> Option<&'a str> {
4659 self.inner.next_back()
4663 #[stable(feature = "fused", since = "1.26.0")]
4664 impl FusedIterator for SplitWhitespace<'_> {}
4666 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4667 impl<'a> Iterator for SplitAsciiWhitespace<'a> {
4668 type Item = &'a str;
4671 fn next(&mut self) -> Option<&'a str> {
4676 fn size_hint(&self) -> (usize, Option<usize>) {
4677 self.inner.size_hint()
4681 fn last(mut self) -> Option<&'a str> {
4686 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4687 impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> {
4689 fn next_back(&mut self) -> Option<&'a str> {
4690 self.inner.next_back()
4694 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4695 impl FusedIterator for SplitAsciiWhitespace<'_> {}
4697 #[unstable(feature = "split_inclusive", issue = "72360")]
4698 impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> {
4699 type Item = &'a str;
4702 fn next(&mut self) -> Option<&'a str> {
4703 self.0.next_inclusive()
4707 #[unstable(feature = "split_inclusive", issue = "72360")]
4708 impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> {
4709 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4710 f.debug_struct("SplitInclusive").field("0", &self.0).finish()
4714 // FIXME(#26925) Remove in favor of `#[derive(Clone)]`
4715 #[unstable(feature = "split_inclusive", issue = "72360")]
4716 impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitInclusive<'a, P> {
4717 fn clone(&self) -> Self {
4718 SplitInclusive(self.0.clone())
4722 #[unstable(feature = "split_inclusive", issue = "72360")]
4723 impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator
4724 for SplitInclusive<'a, P>
4727 fn next_back(&mut self) -> Option<&'a str> {
4728 self.0.next_back_inclusive()
4732 #[unstable(feature = "split_inclusive", issue = "72360")]
4733 impl<'a, P: Pattern<'a>> FusedIterator for SplitInclusive<'a, P> {}
4735 /// An iterator of [`u16`] over the string encoded as UTF-16.
4737 /// This struct is created by the [`encode_utf16`] method on [`str`].
4738 /// See its documentation for more.
4740 /// [`encode_utf16`]: str::encode_utf16
4742 #[stable(feature = "encode_utf16", since = "1.8.0")]
4743 pub struct EncodeUtf16<'a> {
4748 #[stable(feature = "collection_debug", since = "1.17.0")]
4749 impl fmt::Debug for EncodeUtf16<'_> {
4750 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4751 f.pad("EncodeUtf16 { .. }")
4755 #[stable(feature = "encode_utf16", since = "1.8.0")]
4756 impl<'a> Iterator for EncodeUtf16<'a> {
4760 fn next(&mut self) -> Option<u16> {
4761 if self.extra != 0 {
4762 let tmp = self.extra;
4767 let mut buf = [0; 2];
4768 self.chars.next().map(|ch| {
4769 let n = ch.encode_utf16(&mut buf).len();
4771 self.extra = buf[1];
4778 fn size_hint(&self) -> (usize, Option<usize>) {
4779 let (low, high) = self.chars.size_hint();
4780 // every char gets either one u16 or two u16,
4781 // so this iterator is between 1 or 2 times as
4782 // long as the underlying iterator.
4783 (low, high.and_then(|n| n.checked_mul(2)))
4787 #[stable(feature = "fused", since = "1.26.0")]
4788 impl FusedIterator for EncodeUtf16<'_> {}
4790 /// The return type of [`str::escape_debug`].
4791 #[stable(feature = "str_escape", since = "1.34.0")]
4792 #[derive(Clone, Debug)]
4793 pub struct EscapeDebug<'a> {
4795 Flatten<option::IntoIter<char::EscapeDebug>>,
4796 FlatMap<Chars<'a>, char::EscapeDebug, CharEscapeDebugContinue>,
4800 /// The return type of [`str::escape_default`].
4801 #[stable(feature = "str_escape", since = "1.34.0")]
4802 #[derive(Clone, Debug)]
4803 pub struct EscapeDefault<'a> {
4804 inner: FlatMap<Chars<'a>, char::EscapeDefault, CharEscapeDefault>,
4807 /// The return type of [`str::escape_unicode`].
4808 #[stable(feature = "str_escape", since = "1.34.0")]
4809 #[derive(Clone, Debug)]
4810 pub struct EscapeUnicode<'a> {
4811 inner: FlatMap<Chars<'a>, char::EscapeUnicode, CharEscapeUnicode>,
4814 macro_rules! escape_types_impls {
4815 ($( $Name: ident ),+) => {$(
4816 #[stable(feature = "str_escape", since = "1.34.0")]
4817 impl<'a> fmt::Display for $Name<'a> {
4818 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4819 self.clone().try_for_each(|c| f.write_char(c))
4823 #[stable(feature = "str_escape", since = "1.34.0")]
4824 impl<'a> Iterator for $Name<'a> {
4828 fn next(&mut self) -> Option<char> { self.inner.next() }
4831 fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
4834 fn try_fold<Acc, Fold, R>(&mut self, init: Acc, fold: Fold) -> R where
4835 Self: Sized, Fold: FnMut(Acc, Self::Item) -> R, R: Try<Ok=Acc>
4837 self.inner.try_fold(init, fold)
4841 fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
4842 where Fold: FnMut(Acc, Self::Item) -> Acc,
4844 self.inner.fold(init, fold)
4848 #[stable(feature = "str_escape", since = "1.34.0")]
4849 impl<'a> FusedIterator for $Name<'a> {}
4853 escape_types_impls!(EscapeDebug, EscapeDefault, EscapeUnicode);