1 // ignore-tidy-filelength
3 //! String manipulation.
5 //! For more details, see the `std::str` module.
7 #![stable(feature = "rust1", since = "1.0.0")]
9 use self::pattern::Pattern;
10 use self::pattern::{DoubleEndedSearcher, ReverseSearcher, SearchStep, Searcher};
13 use crate::fmt::{self, Write};
14 use crate::iter::{Chain, FlatMap, Flatten};
15 use crate::iter::{Cloned, Filter, FusedIterator, Map, TrustedLen, TrustedRandomAccess};
19 use crate::slice::{self, SliceIndex, Split as SliceSplit};
23 #[unstable(feature = "str_internals", issue = "none")]
24 #[allow(missing_docs)]
27 /// Parse a value from a string
29 /// `FromStr`'s [`from_str`] method is often used implicitly, through
30 /// [`str`]'s [`parse`] method. See [`parse`]'s documentation for examples.
32 /// [`from_str`]: #tymethod.from_str
33 /// [`str`]: ../../std/primitive.str.html
34 /// [`parse`]: ../../std/primitive.str.html#method.parse
36 /// `FromStr` does not have a lifetime parameter, and so you can only parse types
37 /// that do not contain a lifetime parameter themselves. In other words, you can
38 /// parse an `i32` with `FromStr`, but not a `&i32`. You can parse a struct that
39 /// contains an `i32`, but not one that contains an `&i32`.
43 /// Basic implementation of `FromStr` on an example `Point` type:
46 /// use std::str::FromStr;
47 /// use std::num::ParseIntError;
49 /// #[derive(Debug, PartialEq)]
55 /// impl FromStr for Point {
56 /// type Err = ParseIntError;
58 /// fn from_str(s: &str) -> Result<Self, Self::Err> {
59 /// let coords: Vec<&str> = s.trim_matches(|p| p == '(' || p == ')' )
63 /// let x_fromstr = coords[0].parse::<i32>()?;
64 /// let y_fromstr = coords[1].parse::<i32>()?;
66 /// Ok(Point { x: x_fromstr, y: y_fromstr })
70 /// let p = Point::from_str("(1,2)");
71 /// assert_eq!(p.unwrap(), Point{ x: 1, y: 2} )
73 #[stable(feature = "rust1", since = "1.0.0")]
74 pub trait FromStr: Sized {
75 /// The associated error which can be returned from parsing.
76 #[stable(feature = "rust1", since = "1.0.0")]
79 /// Parses a string `s` to return a value of this type.
81 /// If parsing succeeds, return the value inside [`Ok`], otherwise
82 /// when the string is ill-formatted return an error specific to the
83 /// inside [`Err`]. The error type is specific to implementation of the trait.
85 /// [`Ok`]: ../../std/result/enum.Result.html#variant.Ok
86 /// [`Err`]: ../../std/result/enum.Result.html#variant.Err
90 /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
92 /// [ithirtytwo]: ../../std/primitive.i32.html
95 /// use std::str::FromStr;
98 /// let x = i32::from_str(s).unwrap();
100 /// assert_eq!(5, x);
102 #[stable(feature = "rust1", since = "1.0.0")]
103 fn from_str(s: &str) -> Result<Self, Self::Err>;
106 #[stable(feature = "rust1", since = "1.0.0")]
107 impl FromStr for bool {
108 type Err = ParseBoolError;
110 /// Parse a `bool` from a string.
112 /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
113 /// actually be parseable.
118 /// use std::str::FromStr;
120 /// assert_eq!(FromStr::from_str("true"), Ok(true));
121 /// assert_eq!(FromStr::from_str("false"), Ok(false));
122 /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
125 /// Note, in many cases, the `.parse()` method on `str` is more proper.
128 /// assert_eq!("true".parse(), Ok(true));
129 /// assert_eq!("false".parse(), Ok(false));
130 /// assert!("not even a boolean".parse::<bool>().is_err());
133 fn from_str(s: &str) -> Result<bool, ParseBoolError> {
136 "false" => Ok(false),
137 _ => Err(ParseBoolError { _priv: () }),
142 /// An error returned when parsing a `bool` using [`from_str`] fails
144 /// [`from_str`]: ../../std/primitive.bool.html#method.from_str
145 #[derive(Debug, Clone, PartialEq, Eq)]
146 #[stable(feature = "rust1", since = "1.0.0")]
147 pub struct ParseBoolError {
151 #[stable(feature = "rust1", since = "1.0.0")]
152 impl fmt::Display for ParseBoolError {
153 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
154 "provided string was not `true` or `false`".fmt(f)
159 Section: Creating a string
162 /// Errors which can occur when attempting to interpret a sequence of [`u8`]
165 /// [`u8`]: ../../std/primitive.u8.html
167 /// As such, the `from_utf8` family of functions and methods for both [`String`]s
168 /// and [`&str`]s make use of this error, for example.
170 /// [`String`]: ../../std/string/struct.String.html#method.from_utf8
171 /// [`&str`]: ../../std/str/fn.from_utf8.html
175 /// This error type’s methods can be used to create functionality
176 /// similar to `String::from_utf8_lossy` without allocating heap memory:
179 /// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) {
181 /// match std::str::from_utf8(input) {
187 /// let (valid, after_valid) = input.split_at(error.valid_up_to());
189 /// push(std::str::from_utf8_unchecked(valid))
191 /// push("\u{FFFD}");
193 /// if let Some(invalid_sequence_length) = error.error_len() {
194 /// input = &after_valid[invalid_sequence_length..]
203 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
204 #[stable(feature = "rust1", since = "1.0.0")]
205 pub struct Utf8Error {
207 error_len: Option<u8>,
211 /// Returns the index in the given string up to which valid UTF-8 was
214 /// It is the maximum index such that `from_utf8(&input[..index])`
215 /// would return `Ok(_)`.
224 /// // some invalid bytes, in a vector
225 /// let sparkle_heart = vec![0, 159, 146, 150];
227 /// // std::str::from_utf8 returns a Utf8Error
228 /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
230 /// // the second byte is invalid here
231 /// assert_eq!(1, error.valid_up_to());
233 #[stable(feature = "utf8_error", since = "1.5.0")]
234 pub fn valid_up_to(&self) -> usize {
238 /// Provides more information about the failure:
240 /// * `None`: the end of the input was reached unexpectedly.
241 /// `self.valid_up_to()` is 1 to 3 bytes from the end of the input.
242 /// If a byte stream (such as a file or a network socket) is being decoded incrementally,
243 /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
245 /// * `Some(len)`: an unexpected byte was encountered.
246 /// The length provided is that of the invalid byte sequence
247 /// that starts at the index given by `valid_up_to()`.
248 /// Decoding should resume after that sequence
249 /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of
252 /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
253 #[stable(feature = "utf8_error_error_len", since = "1.20.0")]
254 pub fn error_len(&self) -> Option<usize> {
255 self.error_len.map(|len| len as usize)
259 /// Converts a slice of bytes to a string slice.
261 /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice
262 /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between
263 /// the two. Not all byte slices are valid string slices, however: [`&str`] requires
264 /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid
265 /// UTF-8, and then does the conversion.
267 /// [`&str`]: ../../std/primitive.str.html
268 /// [`u8`]: ../../std/primitive.u8.html
269 /// [byteslice]: ../../std/primitive.slice.html
271 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
272 /// incur the overhead of the validity check, there is an unsafe version of
273 /// this function, [`from_utf8_unchecked`][fromutf8u], which has the same
274 /// behavior but skips the check.
276 /// [fromutf8u]: fn.from_utf8_unchecked.html
278 /// If you need a `String` instead of a `&str`, consider
279 /// [`String::from_utf8`][string].
281 /// [string]: ../../std/string/struct.String.html#method.from_utf8
283 /// Because you can stack-allocate a `[u8; N]`, and you can take a
284 /// [`&[u8]`][byteslice] of it, this function is one way to have a
285 /// stack-allocated string. There is an example of this in the
286 /// examples section below.
288 /// [byteslice]: ../../std/primitive.slice.html
292 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
293 /// provided slice is not UTF-8.
302 /// // some bytes, in a vector
303 /// let sparkle_heart = vec![240, 159, 146, 150];
305 /// // We know these bytes are valid, so just use `unwrap()`.
306 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
308 /// assert_eq!("💖", sparkle_heart);
316 /// // some invalid bytes, in a vector
317 /// let sparkle_heart = vec![0, 159, 146, 150];
319 /// assert!(str::from_utf8(&sparkle_heart).is_err());
322 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
323 /// errors that can be returned.
325 /// [error]: struct.Utf8Error.html
327 /// A "stack allocated string":
332 /// // some bytes, in a stack-allocated array
333 /// let sparkle_heart = [240, 159, 146, 150];
335 /// // We know these bytes are valid, so just use `unwrap()`.
336 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
338 /// assert_eq!("💖", sparkle_heart);
340 #[stable(feature = "rust1", since = "1.0.0")]
341 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
342 run_utf8_validation(v)?;
343 // SAFETY: Just ran validation.
344 Ok(unsafe { from_utf8_unchecked(v) })
347 /// Converts a mutable slice of bytes to a mutable string slice.
356 /// // "Hello, Rust!" as a mutable vector
357 /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33];
359 /// // As we know these bytes are valid, we can use `unwrap()`
360 /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap();
362 /// assert_eq!("Hello, Rust!", outstr);
370 /// // Some invalid bytes in a mutable vector
371 /// let mut invalid = vec![128, 223];
373 /// assert!(str::from_utf8_mut(&mut invalid).is_err());
375 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
376 /// errors that can be returned.
378 /// [error]: struct.Utf8Error.html
379 #[stable(feature = "str_mut_extras", since = "1.20.0")]
380 pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
381 run_utf8_validation(v)?;
382 // SAFETY: Just ran validation.
383 Ok(unsafe { from_utf8_unchecked_mut(v) })
386 /// Converts a slice of bytes to a string slice without checking
387 /// that the string contains valid UTF-8.
389 /// See the safe version, [`from_utf8`][fromutf8], for more information.
391 /// [fromutf8]: fn.from_utf8.html
395 /// This function is unsafe because it does not check that the bytes passed to
396 /// it are valid UTF-8. If this constraint is violated, undefined behavior
397 /// results, as the rest of Rust assumes that [`&str`]s are valid UTF-8.
399 /// [`&str`]: ../../std/primitive.str.html
408 /// // some bytes, in a vector
409 /// let sparkle_heart = vec![240, 159, 146, 150];
411 /// let sparkle_heart = unsafe {
412 /// str::from_utf8_unchecked(&sparkle_heart)
415 /// assert_eq!("💖", sparkle_heart);
418 #[stable(feature = "rust1", since = "1.0.0")]
419 pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
420 &*(v as *const [u8] as *const str)
423 /// Converts a slice of bytes to a string slice without checking
424 /// that the string contains valid UTF-8; mutable version.
426 /// See the immutable version, [`from_utf8_unchecked()`][fromutf8], for more information.
428 /// [fromutf8]: fn.from_utf8_unchecked.html
437 /// let mut heart = vec![240, 159, 146, 150];
438 /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) };
440 /// assert_eq!("💖", heart);
443 #[stable(feature = "str_mut_extras", since = "1.20.0")]
444 pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
445 &mut *(v as *mut [u8] as *mut str)
448 #[stable(feature = "rust1", since = "1.0.0")]
449 impl fmt::Display for Utf8Error {
450 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
451 if let Some(error_len) = self.error_len {
454 "invalid utf-8 sequence of {} bytes from index {}",
455 error_len, self.valid_up_to
458 write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
467 /// An iterator over the [`char`]s of a string slice.
469 /// [`char`]: ../../std/primitive.char.html
471 /// This struct is created by the [`chars`] method on [`str`].
472 /// See its documentation for more.
474 /// [`chars`]: ../../std/primitive.str.html#method.chars
475 /// [`str`]: ../../std/primitive.str.html
477 #[stable(feature = "rust1", since = "1.0.0")]
478 pub struct Chars<'a> {
479 iter: slice::Iter<'a, u8>,
482 /// Returns the initial codepoint accumulator for the first byte.
483 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
484 /// for width 3, and 3 bits for width 4.
486 fn utf8_first_byte(byte: u8, width: u32) -> u32 {
487 (byte & (0x7F >> width)) as u32
490 /// Returns the value of `ch` updated with continuation byte `byte`.
492 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
493 (ch << 6) | (byte & CONT_MASK) as u32
496 /// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
499 fn utf8_is_cont_byte(byte: u8) -> bool {
500 (byte & !CONT_MASK) == TAG_CONT_U8
504 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
511 /// Reads the next code point out of a byte iterator (assuming a
512 /// UTF-8-like encoding).
513 #[unstable(feature = "str_internals", issue = "none")]
515 pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
517 let x = *bytes.next()?;
519 return Some(x as u32);
522 // Multibyte case follows
523 // Decode from a byte combination out of: [[[x y] z] w]
524 // NOTE: Performance is sensitive to the exact formulation here
525 let init = utf8_first_byte(x, 2);
526 let y = unwrap_or_0(bytes.next());
527 let mut ch = utf8_acc_cont_byte(init, y);
530 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
531 let z = unwrap_or_0(bytes.next());
532 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
533 ch = init << 12 | y_z;
536 // use only the lower 3 bits of `init`
537 let w = unwrap_or_0(bytes.next());
538 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
545 /// Reads the last code point out of a byte iterator (assuming a
546 /// UTF-8-like encoding).
548 fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
550 I: DoubleEndedIterator<Item = &'a u8>,
553 let w = match *bytes.next_back()? {
554 next_byte if next_byte < 128 => return Some(next_byte as u32),
555 back_byte => back_byte,
558 // Multibyte case follows
559 // Decode from a byte combination out of: [x [y [z w]]]
561 let z = unwrap_or_0(bytes.next_back());
562 ch = utf8_first_byte(z, 2);
563 if utf8_is_cont_byte(z) {
564 let y = unwrap_or_0(bytes.next_back());
565 ch = utf8_first_byte(y, 3);
566 if utf8_is_cont_byte(y) {
567 let x = unwrap_or_0(bytes.next_back());
568 ch = utf8_first_byte(x, 4);
569 ch = utf8_acc_cont_byte(ch, y);
571 ch = utf8_acc_cont_byte(ch, z);
573 ch = utf8_acc_cont_byte(ch, w);
578 #[stable(feature = "rust1", since = "1.0.0")]
579 impl<'a> Iterator for Chars<'a> {
583 fn next(&mut self) -> Option<char> {
584 next_code_point(&mut self.iter).map(|ch| {
585 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
586 unsafe { char::from_u32_unchecked(ch) }
591 fn count(self) -> usize {
592 // length in `char` is equal to the number of non-continuation bytes
593 let bytes_len = self.iter.len();
594 let mut cont_bytes = 0;
595 for &byte in self.iter {
596 cont_bytes += utf8_is_cont_byte(byte) as usize;
598 bytes_len - cont_bytes
602 fn size_hint(&self) -> (usize, Option<usize>) {
603 let len = self.iter.len();
604 // `(len + 3)` can't overflow, because we know that the `slice::Iter`
605 // belongs to a slice in memory which has a maximum length of
606 // `isize::MAX` (that's well below `usize::MAX`).
607 ((len + 3) / 4, Some(len))
611 fn last(mut self) -> Option<char> {
612 // No need to go through the entire string.
617 #[stable(feature = "chars_debug_impl", since = "1.38.0")]
618 impl fmt::Debug for Chars<'_> {
619 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
620 write!(f, "Chars(")?;
621 f.debug_list().entries(self.clone()).finish()?;
627 #[stable(feature = "rust1", since = "1.0.0")]
628 impl<'a> DoubleEndedIterator for Chars<'a> {
630 fn next_back(&mut self) -> Option<char> {
631 next_code_point_reverse(&mut self.iter).map(|ch| {
632 // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
633 unsafe { char::from_u32_unchecked(ch) }
638 #[stable(feature = "fused", since = "1.26.0")]
639 impl FusedIterator for Chars<'_> {}
642 /// Views the underlying data as a subslice of the original data.
644 /// This has the same lifetime as the original slice, and so the
645 /// iterator can continue to be used while this exists.
650 /// let mut chars = "abc".chars();
652 /// assert_eq!(chars.as_str(), "abc");
654 /// assert_eq!(chars.as_str(), "bc");
657 /// assert_eq!(chars.as_str(), "");
659 #[stable(feature = "iter_to_slice", since = "1.4.0")]
661 pub fn as_str(&self) -> &'a str {
662 // SAFETY: `Chars` is only made from a str, which guarantees the iter is valid UTF-8.
663 unsafe { from_utf8_unchecked(self.iter.as_slice()) }
667 /// An iterator over the [`char`]s of a string slice, and their positions.
669 /// [`char`]: ../../std/primitive.char.html
671 /// This struct is created by the [`char_indices`] method on [`str`].
672 /// See its documentation for more.
674 /// [`char_indices`]: ../../std/primitive.str.html#method.char_indices
675 /// [`str`]: ../../std/primitive.str.html
676 #[derive(Clone, Debug)]
677 #[stable(feature = "rust1", since = "1.0.0")]
678 pub struct CharIndices<'a> {
683 #[stable(feature = "rust1", since = "1.0.0")]
684 impl<'a> Iterator for CharIndices<'a> {
685 type Item = (usize, char);
688 fn next(&mut self) -> Option<(usize, char)> {
689 let pre_len = self.iter.iter.len();
690 match self.iter.next() {
693 let index = self.front_offset;
694 let len = self.iter.iter.len();
695 self.front_offset += pre_len - len;
702 fn count(self) -> usize {
707 fn size_hint(&self) -> (usize, Option<usize>) {
708 self.iter.size_hint()
712 fn last(mut self) -> Option<(usize, char)> {
713 // No need to go through the entire string.
718 #[stable(feature = "rust1", since = "1.0.0")]
719 impl<'a> DoubleEndedIterator for CharIndices<'a> {
721 fn next_back(&mut self) -> Option<(usize, char)> {
722 self.iter.next_back().map(|ch| {
723 let index = self.front_offset + self.iter.iter.len();
729 #[stable(feature = "fused", since = "1.26.0")]
730 impl FusedIterator for CharIndices<'_> {}
732 impl<'a> CharIndices<'a> {
733 /// Views the underlying data as a subslice of the original data.
735 /// This has the same lifetime as the original slice, and so the
736 /// iterator can continue to be used while this exists.
737 #[stable(feature = "iter_to_slice", since = "1.4.0")]
739 pub fn as_str(&self) -> &'a str {
744 /// An iterator over the bytes of a string slice.
746 /// This struct is created by the [`bytes`] method on [`str`].
747 /// See its documentation for more.
749 /// [`bytes`]: ../../std/primitive.str.html#method.bytes
750 /// [`str`]: ../../std/primitive.str.html
751 #[stable(feature = "rust1", since = "1.0.0")]
752 #[derive(Clone, Debug)]
753 pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>);
755 #[stable(feature = "rust1", since = "1.0.0")]
756 impl Iterator for Bytes<'_> {
760 fn next(&mut self) -> Option<u8> {
765 fn size_hint(&self) -> (usize, Option<usize>) {
770 fn count(self) -> usize {
775 fn last(self) -> Option<Self::Item> {
780 fn nth(&mut self, n: usize) -> Option<Self::Item> {
785 fn all<F>(&mut self, f: F) -> bool
787 F: FnMut(Self::Item) -> bool,
793 fn any<F>(&mut self, f: F) -> bool
795 F: FnMut(Self::Item) -> bool,
801 fn find<P>(&mut self, predicate: P) -> Option<Self::Item>
803 P: FnMut(&Self::Item) -> bool,
805 self.0.find(predicate)
809 fn position<P>(&mut self, predicate: P) -> Option<usize>
811 P: FnMut(Self::Item) -> bool,
813 self.0.position(predicate)
817 fn rposition<P>(&mut self, predicate: P) -> Option<usize>
819 P: FnMut(Self::Item) -> bool,
821 self.0.rposition(predicate)
825 #[stable(feature = "rust1", since = "1.0.0")]
826 impl DoubleEndedIterator for Bytes<'_> {
828 fn next_back(&mut self) -> Option<u8> {
833 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
838 fn rfind<P>(&mut self, predicate: P) -> Option<Self::Item>
840 P: FnMut(&Self::Item) -> bool,
842 self.0.rfind(predicate)
846 #[stable(feature = "rust1", since = "1.0.0")]
847 impl ExactSizeIterator for Bytes<'_> {
849 fn len(&self) -> usize {
854 fn is_empty(&self) -> bool {
859 #[stable(feature = "fused", since = "1.26.0")]
860 impl FusedIterator for Bytes<'_> {}
862 #[unstable(feature = "trusted_len", issue = "37572")]
863 unsafe impl TrustedLen for Bytes<'_> {}
866 unsafe impl TrustedRandomAccess for Bytes<'_> {
867 unsafe fn get_unchecked(&mut self, i: usize) -> u8 {
868 self.0.get_unchecked(i)
870 fn may_have_side_effect() -> bool {
875 /// This macro generates a Clone impl for string pattern API
876 /// wrapper types of the form X<'a, P>
877 macro_rules! derive_pattern_clone {
878 (clone $t:ident with |$s:ident| $e:expr) => {
879 impl<'a, P> Clone for $t<'a, P>
881 P: Pattern<'a, Searcher: Clone>,
883 fn clone(&self) -> Self {
891 /// This macro generates two public iterator structs
892 /// wrapping a private internal one that makes use of the `Pattern` API.
894 /// For all patterns `P: Pattern<'a>` the following items will be
895 /// generated (generics omitted):
897 /// struct $forward_iterator($internal_iterator);
898 /// struct $reverse_iterator($internal_iterator);
900 /// impl Iterator for $forward_iterator
901 /// { /* internal ends up calling Searcher::next_match() */ }
903 /// impl DoubleEndedIterator for $forward_iterator
904 /// where P::Searcher: DoubleEndedSearcher
905 /// { /* internal ends up calling Searcher::next_match_back() */ }
907 /// impl Iterator for $reverse_iterator
908 /// where P::Searcher: ReverseSearcher
909 /// { /* internal ends up calling Searcher::next_match_back() */ }
911 /// impl DoubleEndedIterator for $reverse_iterator
912 /// where P::Searcher: DoubleEndedSearcher
913 /// { /* internal ends up calling Searcher::next_match() */ }
915 /// The internal one is defined outside the macro, and has almost the same
916 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
917 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
919 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
920 /// `Pattern` might not return the same elements, so actually implementing
921 /// `DoubleEndedIterator` for it would be incorrect.
922 /// (See the docs in `str::pattern` for more details)
924 /// However, the internal struct still represents a single ended iterator from
925 /// either end, and depending on pattern is also a valid double ended iterator,
926 /// so the two wrapper structs implement `Iterator`
927 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
928 /// to the complex impls seen above.
929 macro_rules! generate_pattern_iterators {
933 $(#[$forward_iterator_attribute:meta])*
934 struct $forward_iterator:ident;
938 $(#[$reverse_iterator_attribute:meta])*
939 struct $reverse_iterator:ident;
941 // Stability of all generated items
943 $(#[$common_stability_attribute:meta])*
945 // Internal almost-iterator that is being delegated to
947 $internal_iterator:ident yielding ($iterty:ty);
949 // Kind of delegation - either single ended or double ended
952 $(#[$forward_iterator_attribute])*
953 $(#[$common_stability_attribute])*
954 pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
956 $(#[$common_stability_attribute])*
957 impl<'a, P> fmt::Debug for $forward_iterator<'a, P>
959 P: Pattern<'a, Searcher: fmt::Debug>,
961 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
962 f.debug_tuple(stringify!($forward_iterator))
968 $(#[$common_stability_attribute])*
969 impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
973 fn next(&mut self) -> Option<$iterty> {
978 $(#[$common_stability_attribute])*
979 impl<'a, P> Clone for $forward_iterator<'a, P>
981 P: Pattern<'a, Searcher: Clone>,
983 fn clone(&self) -> Self {
984 $forward_iterator(self.0.clone())
988 $(#[$reverse_iterator_attribute])*
989 $(#[$common_stability_attribute])*
990 pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
992 $(#[$common_stability_attribute])*
993 impl<'a, P> fmt::Debug for $reverse_iterator<'a, P>
995 P: Pattern<'a, Searcher: fmt::Debug>,
997 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
998 f.debug_tuple(stringify!($reverse_iterator))
1004 $(#[$common_stability_attribute])*
1005 impl<'a, P> Iterator for $reverse_iterator<'a, P>
1007 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1009 type Item = $iterty;
1012 fn next(&mut self) -> Option<$iterty> {
1017 $(#[$common_stability_attribute])*
1018 impl<'a, P> Clone for $reverse_iterator<'a, P>
1020 P: Pattern<'a, Searcher: Clone>,
1022 fn clone(&self) -> Self {
1023 $reverse_iterator(self.0.clone())
1027 #[stable(feature = "fused", since = "1.26.0")]
1028 impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {}
1030 #[stable(feature = "fused", since = "1.26.0")]
1031 impl<'a, P> FusedIterator for $reverse_iterator<'a, P>
1033 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1036 generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
1038 $reverse_iterator, $iterty);
1041 double ended; with $(#[$common_stability_attribute:meta])*,
1042 $forward_iterator:ident,
1043 $reverse_iterator:ident, $iterty:ty
1045 $(#[$common_stability_attribute])*
1046 impl<'a, P> DoubleEndedIterator for $forward_iterator<'a, P>
1048 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1051 fn next_back(&mut self) -> Option<$iterty> {
1056 $(#[$common_stability_attribute])*
1057 impl<'a, P> DoubleEndedIterator for $reverse_iterator<'a, P>
1059 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1062 fn next_back(&mut self) -> Option<$iterty> {
1068 single ended; with $(#[$common_stability_attribute:meta])*,
1069 $forward_iterator:ident,
1070 $reverse_iterator:ident, $iterty:ty
1074 derive_pattern_clone! {
1076 with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
1079 struct SplitInternal<'a, P: Pattern<'a>> {
1082 matcher: P::Searcher,
1083 allow_trailing_empty: bool,
1087 impl<'a, P> fmt::Debug for SplitInternal<'a, P>
1089 P: Pattern<'a, Searcher: fmt::Debug>,
1091 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1092 f.debug_struct("SplitInternal")
1093 .field("start", &self.start)
1094 .field("end", &self.end)
1095 .field("matcher", &self.matcher)
1096 .field("allow_trailing_empty", &self.allow_trailing_empty)
1097 .field("finished", &self.finished)
1102 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1104 fn get_end(&mut self) -> Option<&'a str> {
1105 if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
1106 self.finished = true;
1107 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1109 let string = self.matcher.haystack().get_unchecked(self.start..self.end);
1118 fn next(&mut self) -> Option<&'a str> {
1123 let haystack = self.matcher.haystack();
1124 match self.matcher.next_match() {
1125 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1126 Some((a, b)) => unsafe {
1127 let elt = haystack.get_unchecked(self.start..a);
1131 None => self.get_end(),
1136 fn next_back(&mut self) -> Option<&'a str>
1138 P::Searcher: ReverseSearcher<'a>,
1144 if !self.allow_trailing_empty {
1145 self.allow_trailing_empty = true;
1146 match self.next_back() {
1147 Some(elt) if !elt.is_empty() => return Some(elt),
1156 let haystack = self.matcher.haystack();
1157 match self.matcher.next_match_back() {
1158 // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries.
1159 Some((a, b)) => unsafe {
1160 let elt = haystack.get_unchecked(b..self.end);
1164 // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
1166 self.finished = true;
1167 Some(haystack.get_unchecked(self.start..self.end))
1173 generate_pattern_iterators! {
1175 /// Created with the method [`split`].
1177 /// [`split`]: ../../std/primitive.str.html#method.split
1180 /// Created with the method [`rsplit`].
1182 /// [`rsplit`]: ../../std/primitive.str.html#method.rsplit
1185 #[stable(feature = "rust1", since = "1.0.0")]
1187 SplitInternal yielding (&'a str);
1188 delegate double ended;
1191 generate_pattern_iterators! {
1193 /// Created with the method [`split_terminator`].
1195 /// [`split_terminator`]: ../../std/primitive.str.html#method.split_terminator
1196 struct SplitTerminator;
1198 /// Created with the method [`rsplit_terminator`].
1200 /// [`rsplit_terminator`]: ../../std/primitive.str.html#method.rsplit_terminator
1201 struct RSplitTerminator;
1203 #[stable(feature = "rust1", since = "1.0.0")]
1205 SplitInternal yielding (&'a str);
1206 delegate double ended;
1209 derive_pattern_clone! {
1210 clone SplitNInternal
1211 with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
1214 struct SplitNInternal<'a, P: Pattern<'a>> {
1215 iter: SplitInternal<'a, P>,
1216 /// The number of splits remaining
1220 impl<'a, P> fmt::Debug for SplitNInternal<'a, P>
1222 P: Pattern<'a, Searcher: fmt::Debug>,
1224 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1225 f.debug_struct("SplitNInternal")
1226 .field("iter", &self.iter)
1227 .field("count", &self.count)
1232 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
1234 fn next(&mut self) -> Option<&'a str> {
1249 fn next_back(&mut self) -> Option<&'a str>
1251 P::Searcher: ReverseSearcher<'a>,
1261 self.iter.next_back()
1267 generate_pattern_iterators! {
1269 /// Created with the method [`splitn`].
1271 /// [`splitn`]: ../../std/primitive.str.html#method.splitn
1274 /// Created with the method [`rsplitn`].
1276 /// [`rsplitn`]: ../../std/primitive.str.html#method.rsplitn
1279 #[stable(feature = "rust1", since = "1.0.0")]
1281 SplitNInternal yielding (&'a str);
1282 delegate single ended;
1285 derive_pattern_clone! {
1286 clone MatchIndicesInternal
1287 with |s| MatchIndicesInternal(s.0.clone())
1290 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
1292 impl<'a, P> fmt::Debug for MatchIndicesInternal<'a, P>
1294 P: Pattern<'a, Searcher: fmt::Debug>,
1296 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1297 f.debug_tuple("MatchIndicesInternal").field(&self.0).finish()
1301 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1303 fn next(&mut self) -> Option<(usize, &'a str)> {
1306 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1307 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1311 fn next_back(&mut self) -> Option<(usize, &'a str)>
1313 P::Searcher: ReverseSearcher<'a>,
1317 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1318 .map(|(start, end)| unsafe { (start, self.0.haystack().get_unchecked(start..end)) })
1322 generate_pattern_iterators! {
1324 /// Created with the method [`match_indices`].
1326 /// [`match_indices`]: ../../std/primitive.str.html#method.match_indices
1327 struct MatchIndices;
1329 /// Created with the method [`rmatch_indices`].
1331 /// [`rmatch_indices`]: ../../std/primitive.str.html#method.rmatch_indices
1332 struct RMatchIndices;
1334 #[stable(feature = "str_match_indices", since = "1.5.0")]
1336 MatchIndicesInternal yielding ((usize, &'a str));
1337 delegate double ended;
1340 derive_pattern_clone! {
1341 clone MatchesInternal
1342 with |s| MatchesInternal(s.0.clone())
1345 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1347 impl<'a, P> fmt::Debug for MatchesInternal<'a, P>
1349 P: Pattern<'a, Searcher: fmt::Debug>,
1351 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1352 f.debug_tuple("MatchesInternal").field(&self.0).finish()
1356 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1358 fn next(&mut self) -> Option<&'a str> {
1359 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1360 self.0.next_match().map(|(a, b)| unsafe {
1361 // Indices are known to be on utf8 boundaries
1362 self.0.haystack().get_unchecked(a..b)
1367 fn next_back(&mut self) -> Option<&'a str>
1369 P::Searcher: ReverseSearcher<'a>,
1371 // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries.
1372 self.0.next_match_back().map(|(a, b)| unsafe {
1373 // Indices are known to be on utf8 boundaries
1374 self.0.haystack().get_unchecked(a..b)
1379 generate_pattern_iterators! {
1381 /// Created with the method [`matches`].
1383 /// [`matches`]: ../../std/primitive.str.html#method.matches
1386 /// Created with the method [`rmatches`].
1388 /// [`rmatches`]: ../../std/primitive.str.html#method.rmatches
1391 #[stable(feature = "str_matches", since = "1.2.0")]
1393 MatchesInternal yielding (&'a str);
1394 delegate double ended;
1397 /// An iterator over the lines of a string, as string slices.
1399 /// This struct is created with the [`lines`] method on [`str`].
1400 /// See its documentation for more.
1402 /// [`lines`]: ../../std/primitive.str.html#method.lines
1403 /// [`str`]: ../../std/primitive.str.html
1404 #[stable(feature = "rust1", since = "1.0.0")]
1405 #[derive(Clone, Debug)]
1406 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1408 #[stable(feature = "rust1", since = "1.0.0")]
1409 impl<'a> Iterator for Lines<'a> {
1410 type Item = &'a str;
1413 fn next(&mut self) -> Option<&'a str> {
1418 fn size_hint(&self) -> (usize, Option<usize>) {
1423 fn last(mut self) -> Option<&'a str> {
1428 #[stable(feature = "rust1", since = "1.0.0")]
1429 impl<'a> DoubleEndedIterator for Lines<'a> {
1431 fn next_back(&mut self) -> Option<&'a str> {
1436 #[stable(feature = "fused", since = "1.26.0")]
1437 impl FusedIterator for Lines<'_> {}
1439 /// Created with the method [`lines_any`].
1441 /// [`lines_any`]: ../../std/primitive.str.html#method.lines_any
1442 #[stable(feature = "rust1", since = "1.0.0")]
1443 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1444 #[derive(Clone, Debug)]
1445 #[allow(deprecated)]
1446 pub struct LinesAny<'a>(Lines<'a>);
1449 /// A nameable, cloneable fn type
1451 struct LinesAnyMap impl<'a> Fn = |line: &'a str| -> &'a str {
1453 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1458 #[stable(feature = "rust1", since = "1.0.0")]
1459 #[allow(deprecated)]
1460 impl<'a> Iterator for LinesAny<'a> {
1461 type Item = &'a str;
1464 fn next(&mut self) -> Option<&'a str> {
1469 fn size_hint(&self) -> (usize, Option<usize>) {
1474 #[stable(feature = "rust1", since = "1.0.0")]
1475 #[allow(deprecated)]
1476 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1478 fn next_back(&mut self) -> Option<&'a str> {
1483 #[stable(feature = "fused", since = "1.26.0")]
1484 #[allow(deprecated)]
1485 impl FusedIterator for LinesAny<'_> {}
1488 Section: UTF-8 validation
1491 // use truncation to fit u64 into usize
1492 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1494 /// Returns `true` if any byte in the word `x` is nonascii (>= 128).
1496 fn contains_nonascii(x: usize) -> bool {
1497 (x & NONASCII_MASK) != 0
1500 /// Walks through `v` checking that it's a valid UTF-8 sequence,
1501 /// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
1503 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1507 let usize_bytes = mem::size_of::<usize>();
1508 let ascii_block_size = 2 * usize_bytes;
1509 let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1510 let align = v.as_ptr().align_offset(usize_bytes);
1513 let old_offset = index;
1515 ($error_len: expr) => {
1516 return Err(Utf8Error { valid_up_to: old_offset, error_len: $error_len });
1523 // we needed data, but there was none: error!
1531 let first = v[index];
1533 let w = UTF8_CHAR_WIDTH[first as usize];
1534 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
1535 // first C2 80 last DF BF
1536 // 3-byte encoding is for codepoints \u{0800} to \u{ffff}
1537 // first E0 A0 80 last EF BF BF
1538 // excluding surrogates codepoints \u{d800} to \u{dfff}
1539 // ED A0 80 to ED BF BF
1540 // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1541 // first F0 90 80 80 last F4 8F BF BF
1543 // Use the UTF-8 syntax from the RFC
1545 // https://tools.ietf.org/html/rfc3629
1547 // UTF8-2 = %xC2-DF UTF8-tail
1548 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1549 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1550 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1551 // %xF4 %x80-8F 2( UTF8-tail )
1554 if next!() & !CONT_MASK != TAG_CONT_U8 {
1559 match (first, next!()) {
1561 | (0xE1..=0xEC, 0x80..=0xBF)
1562 | (0xED, 0x80..=0x9F)
1563 | (0xEE..=0xEF, 0x80..=0xBF) => {}
1566 if next!() & !CONT_MASK != TAG_CONT_U8 {
1571 match (first, next!()) {
1572 (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
1575 if next!() & !CONT_MASK != TAG_CONT_U8 {
1578 if next!() & !CONT_MASK != TAG_CONT_U8 {
1586 // Ascii case, try to skip forward quickly.
1587 // When the pointer is aligned, read 2 words of data per iteration
1588 // until we find a word containing a non-ascii byte.
1589 if align != usize::max_value() && align.wrapping_sub(index) % usize_bytes == 0 {
1590 let ptr = v.as_ptr();
1591 while index < blocks_end {
1592 // SAFETY: since `align - index` and `ascii_block_size` are
1593 // multiples of `usize_bytes`, `block = ptr.add(index)` is
1594 // always aligned with a `usize` so it's safe to dereference
1595 // both `block` and `block.offset(1)`.
1597 let block = ptr.add(index) as *const usize;
1598 // break if there is a nonascii byte
1599 let zu = contains_nonascii(*block);
1600 let zv = contains_nonascii(*block.offset(1));
1605 index += ascii_block_size;
1607 // step from the point where the wordwise loop stopped
1608 while index < len && v[index] < 128 {
1620 // https://tools.ietf.org/html/rfc3629
1621 static UTF8_CHAR_WIDTH: [u8; 256] = [
1622 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1624 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1626 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1628 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1634 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1636 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
1637 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
1640 /// Given a first byte, determines how many bytes are in this UTF-8 character.
1641 #[unstable(feature = "str_internals", issue = "none")]
1643 pub fn utf8_char_width(b: u8) -> usize {
1644 UTF8_CHAR_WIDTH[b as usize] as usize
1647 /// Mask of the value bits of a continuation byte.
1648 const CONT_MASK: u8 = 0b0011_1111;
1649 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
1650 const TAG_CONT_U8: u8 = 0b1000_0000;
1653 Section: Trait implementations
1657 use crate::cmp::Ordering;
1659 use crate::slice::{self, SliceIndex};
1661 /// Implements ordering of strings.
1663 /// Strings are ordered lexicographically by their byte values. This orders Unicode code
1664 /// points based on their positions in the code charts. This is not necessarily the same as
1665 /// "alphabetical" order, which varies by language and locale. Sorting strings according to
1666 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1668 #[stable(feature = "rust1", since = "1.0.0")]
1671 fn cmp(&self, other: &str) -> Ordering {
1672 self.as_bytes().cmp(other.as_bytes())
1676 #[stable(feature = "rust1", since = "1.0.0")]
1677 impl PartialEq for str {
1679 fn eq(&self, other: &str) -> bool {
1680 self.as_bytes() == other.as_bytes()
1683 fn ne(&self, other: &str) -> bool {
1688 #[stable(feature = "rust1", since = "1.0.0")]
1691 /// Implements comparison operations on strings.
1693 /// Strings are compared lexicographically by their byte values. This compares Unicode code
1694 /// points based on their positions in the code charts. This is not necessarily the same as
1695 /// "alphabetical" order, which varies by language and locale. Comparing strings according to
1696 /// culturally-accepted standards requires locale-specific data that is outside the scope of
1698 #[stable(feature = "rust1", since = "1.0.0")]
1699 impl PartialOrd for str {
1701 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1702 Some(self.cmp(other))
1706 #[stable(feature = "rust1", since = "1.0.0")]
1707 impl<I> ops::Index<I> for str
1711 type Output = I::Output;
1714 fn index(&self, index: I) -> &I::Output {
1719 #[stable(feature = "rust1", since = "1.0.0")]
1720 impl<I> ops::IndexMut<I> for str
1725 fn index_mut(&mut self, index: I) -> &mut I::Output {
1726 index.index_mut(self)
1732 fn str_index_overflow_fail() -> ! {
1733 panic!("attempted to index str up to maximum usize");
1736 /// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`.
1738 /// Returns a slice of the whole string, i.e., returns `&self` or `&mut
1739 /// self`. Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`. Unlike
1740 /// other indexing operations, this can never panic.
1742 /// This operation is `O(1)`.
1744 /// Prior to 1.20.0, these indexing operations were still supported by
1745 /// direct implementation of `Index` and `IndexMut`.
1747 /// Equivalent to `&self[0 .. len]` or `&mut self[0 .. len]`.
1748 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1749 impl SliceIndex<str> for ops::RangeFull {
1752 fn get(self, slice: &str) -> Option<&Self::Output> {
1756 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1760 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1764 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1768 fn index(self, slice: &str) -> &Self::Output {
1772 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1777 /// Implements substring slicing with syntax `&self[begin .. end]` or `&mut
1778 /// self[begin .. end]`.
1780 /// Returns a slice of the given string from the byte range
1781 /// [`begin`, `end`).
1783 /// This operation is `O(1)`.
1785 /// Prior to 1.20.0, these indexing operations were still supported by
1786 /// direct implementation of `Index` and `IndexMut`.
1790 /// Panics if `begin` or `end` does not point to the starting byte offset of
1791 /// a character (as defined by `is_char_boundary`), if `begin > end`, or if
1797 /// let s = "Löwe 老虎 Léopard";
1798 /// assert_eq!(&s[0 .. 1], "L");
1800 /// assert_eq!(&s[1 .. 9], "öwe 老");
1802 /// // these will panic:
1803 /// // byte 2 lies within `ö`:
1806 /// // byte 8 lies within `老`
1809 /// // byte 100 is outside the string
1810 /// // &s[3 .. 100];
1812 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1813 impl SliceIndex<str> for ops::Range<usize> {
1816 fn get(self, slice: &str) -> Option<&Self::Output> {
1817 if self.start <= self.end
1818 && slice.is_char_boundary(self.start)
1819 && slice.is_char_boundary(self.end)
1821 // SAFETY: just checked that `start` and `end` are on a char boundary.
1822 Some(unsafe { self.get_unchecked(slice) })
1828 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1829 if self.start <= self.end
1830 && slice.is_char_boundary(self.start)
1831 && slice.is_char_boundary(self.end)
1833 // SAFETY: just checked that `start` and `end` are on a char boundary.
1834 Some(unsafe { self.get_unchecked_mut(slice) })
1840 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1841 let ptr = slice.as_ptr().add(self.start);
1842 let len = self.end - self.start;
1843 super::from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1846 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1847 let ptr = slice.as_mut_ptr().add(self.start);
1848 let len = self.end - self.start;
1849 super::from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, len))
1852 fn index(self, slice: &str) -> &Self::Output {
1853 let (start, end) = (self.start, self.end);
1854 self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, start, end))
1857 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1858 // is_char_boundary checks that the index is in [0, .len()]
1859 // cannot reuse `get` as above, because of NLL trouble
1860 if self.start <= self.end
1861 && slice.is_char_boundary(self.start)
1862 && slice.is_char_boundary(self.end)
1864 // SAFETY: just checked that `start` and `end` are on a char boundary.
1865 unsafe { self.get_unchecked_mut(slice) }
1867 super::slice_error_fail(slice, self.start, self.end)
1872 /// Implements substring slicing with syntax `&self[.. end]` or `&mut
1875 /// Returns a slice of the given string from the byte range [`0`, `end`).
1876 /// Equivalent to `&self[0 .. end]` or `&mut self[0 .. end]`.
1878 /// This operation is `O(1)`.
1880 /// Prior to 1.20.0, these indexing operations were still supported by
1881 /// direct implementation of `Index` and `IndexMut`.
1885 /// Panics if `end` does not point to the starting byte offset of a
1886 /// character (as defined by `is_char_boundary`), or if `end > len`.
1887 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1888 impl SliceIndex<str> for ops::RangeTo<usize> {
1891 fn get(self, slice: &str) -> Option<&Self::Output> {
1892 if slice.is_char_boundary(self.end) {
1893 // SAFETY: just checked that `end` is on a char boundary.
1894 Some(unsafe { self.get_unchecked(slice) })
1900 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1901 if slice.is_char_boundary(self.end) {
1902 // SAFETY: just checked that `end` is on a char boundary.
1903 Some(unsafe { self.get_unchecked_mut(slice) })
1909 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1910 let ptr = slice.as_ptr();
1911 super::from_utf8_unchecked(slice::from_raw_parts(ptr, self.end))
1914 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1915 let ptr = slice.as_mut_ptr();
1916 super::from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, self.end))
1919 fn index(self, slice: &str) -> &Self::Output {
1921 self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, 0, end))
1924 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1925 if slice.is_char_boundary(self.end) {
1926 // SAFETY: just checked that `end` is on a char boundary.
1927 unsafe { self.get_unchecked_mut(slice) }
1929 super::slice_error_fail(slice, 0, self.end)
1934 /// Implements substring slicing with syntax `&self[begin ..]` or `&mut
1935 /// self[begin ..]`.
1937 /// Returns a slice of the given string from the byte range [`begin`,
1938 /// `len`). Equivalent to `&self[begin .. len]` or `&mut self[begin ..
1941 /// This operation is `O(1)`.
1943 /// Prior to 1.20.0, these indexing operations were still supported by
1944 /// direct implementation of `Index` and `IndexMut`.
1948 /// Panics if `begin` does not point to the starting byte offset of
1949 /// a character (as defined by `is_char_boundary`), or if `begin >= len`.
1950 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
1951 impl SliceIndex<str> for ops::RangeFrom<usize> {
1954 fn get(self, slice: &str) -> Option<&Self::Output> {
1955 if slice.is_char_boundary(self.start) {
1956 // SAFETY: just checked that `start` is on a char boundary.
1957 Some(unsafe { self.get_unchecked(slice) })
1963 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1964 if slice.is_char_boundary(self.start) {
1965 // SAFETY: just checked that `start` is on a char boundary.
1966 Some(unsafe { self.get_unchecked_mut(slice) })
1972 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1973 let ptr = slice.as_ptr().add(self.start);
1974 let len = slice.len() - self.start;
1975 super::from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1978 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1979 let ptr = slice.as_mut_ptr().add(self.start);
1980 let len = slice.len() - self.start;
1981 super::from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, len))
1984 fn index(self, slice: &str) -> &Self::Output {
1985 let (start, end) = (self.start, slice.len());
1986 self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, start, end))
1989 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1990 if slice.is_char_boundary(self.start) {
1991 // SAFETY: just checked that `start` is on a char boundary.
1992 unsafe { self.get_unchecked_mut(slice) }
1994 super::slice_error_fail(slice, self.start, slice.len())
1999 /// Implements substring slicing with syntax `&self[begin ..= end]` or `&mut
2000 /// self[begin ..= end]`.
2002 /// Returns a slice of the given string from the byte range
2003 /// [`begin`, `end`]. Equivalent to `&self [begin .. end + 1]` or `&mut
2004 /// self[begin .. end + 1]`, except if `end` has the maximum value for
2007 /// This operation is `O(1)`.
2011 /// Panics if `begin` does not point to the starting byte offset of
2012 /// a character (as defined by `is_char_boundary`), if `end` does not point
2013 /// to the ending byte offset of a character (`end + 1` is either a starting
2014 /// byte offset or equal to `len`), if `begin > end`, or if `end >= len`.
2015 #[stable(feature = "inclusive_range", since = "1.26.0")]
2016 impl SliceIndex<str> for ops::RangeInclusive<usize> {
2019 fn get(self, slice: &str) -> Option<&Self::Output> {
2020 if *self.end() == usize::max_value() {
2023 (*self.start()..self.end() + 1).get(slice)
2027 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2028 if *self.end() == usize::max_value() {
2031 (*self.start()..self.end() + 1).get_mut(slice)
2035 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
2036 (*self.start()..self.end() + 1).get_unchecked(slice)
2039 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
2040 (*self.start()..self.end() + 1).get_unchecked_mut(slice)
2043 fn index(self, slice: &str) -> &Self::Output {
2044 if *self.end() == usize::max_value() {
2045 str_index_overflow_fail();
2047 (*self.start()..self.end() + 1).index(slice)
2050 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2051 if *self.end() == usize::max_value() {
2052 str_index_overflow_fail();
2054 (*self.start()..self.end() + 1).index_mut(slice)
2058 /// Implements substring slicing with syntax `&self[..= end]` or `&mut
2061 /// Returns a slice of the given string from the byte range [0, `end`].
2062 /// Equivalent to `&self [0 .. end + 1]`, except if `end` has the maximum
2063 /// value for `usize`.
2065 /// This operation is `O(1)`.
2069 /// Panics if `end` does not point to the ending byte offset of a character
2070 /// (`end + 1` is either a starting byte offset as defined by
2071 /// `is_char_boundary`, or equal to `len`), or if `end >= len`.
2072 #[stable(feature = "inclusive_range", since = "1.26.0")]
2073 impl SliceIndex<str> for ops::RangeToInclusive<usize> {
2076 fn get(self, slice: &str) -> Option<&Self::Output> {
2077 if self.end == usize::max_value() { None } else { (..self.end + 1).get(slice) }
2080 fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
2081 if self.end == usize::max_value() { None } else { (..self.end + 1).get_mut(slice) }
2084 unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
2085 (..self.end + 1).get_unchecked(slice)
2088 unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
2089 (..self.end + 1).get_unchecked_mut(slice)
2092 fn index(self, slice: &str) -> &Self::Output {
2093 if self.end == usize::max_value() {
2094 str_index_overflow_fail();
2096 (..self.end + 1).index(slice)
2099 fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2100 if self.end == usize::max_value() {
2101 str_index_overflow_fail();
2103 (..self.end + 1).index_mut(slice)
2108 // truncate `&str` to length at most equal to `max`
2109 // return `true` if it were truncated, and the new str.
2110 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
2114 while !s.is_char_boundary(max) {
2123 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
2124 const MAX_DISPLAY_LENGTH: usize = 256;
2125 let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
2126 let ellipsis = if truncated { "[...]" } else { "" };
2129 if begin > s.len() || end > s.len() {
2130 let oob_index = if begin > s.len() { begin } else { end };
2131 panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
2137 "begin <= end ({} <= {}) when slicing `{}`{}",
2144 // 3. character boundary
2145 let index = if !s.is_char_boundary(begin) { begin } else { end };
2146 // find the character
2147 let mut char_start = index;
2148 while !s.is_char_boundary(char_start) {
2151 // `char_start` must be less than len and a char boundary
2152 let ch = s[char_start..].chars().next().unwrap();
2153 let char_range = char_start..char_start + ch.len_utf8();
2155 "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
2156 index, ch, char_range, s_trunc, ellipsis
2163 /// Returns the length of `self`.
2165 /// This length is in bytes, not [`char`]s or graphemes. In other words,
2166 /// it may not be what a human considers the length of the string.
2173 /// let len = "foo".len();
2174 /// assert_eq!(3, len);
2176 /// assert_eq!("ƒoo".len(), 4); // fancy f!
2177 /// assert_eq!("ƒoo".chars().count(), 3);
2179 #[stable(feature = "rust1", since = "1.0.0")]
2180 #[rustc_const_stable(feature = "const_str_len", since = "1.32.0")]
2182 pub const fn len(&self) -> usize {
2183 self.as_bytes().len()
2186 /// Returns `true` if `self` has a length of zero bytes.
2194 /// assert!(s.is_empty());
2196 /// let s = "not empty";
2197 /// assert!(!s.is_empty());
2200 #[stable(feature = "rust1", since = "1.0.0")]
2201 #[rustc_const_stable(feature = "const_str_is_empty", since = "1.32.0")]
2202 pub const fn is_empty(&self) -> bool {
2206 /// Checks that `index`-th byte lies at the start and/or end of a
2207 /// UTF-8 code point sequence.
2209 /// The start and end of the string (when `index == self.len()`) are
2210 /// considered to be
2213 /// Returns `false` if `index` is greater than `self.len()`.
2218 /// let s = "Löwe 老虎 Léopard";
2219 /// assert!(s.is_char_boundary(0));
2221 /// assert!(s.is_char_boundary(6));
2222 /// assert!(s.is_char_boundary(s.len()));
2224 /// // second byte of `ö`
2225 /// assert!(!s.is_char_boundary(2));
2227 /// // third byte of `老`
2228 /// assert!(!s.is_char_boundary(8));
2230 #[stable(feature = "is_char_boundary", since = "1.9.0")]
2232 pub fn is_char_boundary(&self, index: usize) -> bool {
2233 // 0 and len are always ok.
2234 // Test for 0 explicitly so that it can optimize out the check
2235 // easily and skip reading string data for that case.
2236 if index == 0 || index == self.len() {
2239 match self.as_bytes().get(index) {
2241 // This is bit magic equivalent to: b < 128 || b >= 192
2242 Some(&b) => (b as i8) >= -0x40,
2246 /// Converts a string slice to a byte slice. To convert the byte slice back
2247 /// into a string slice, use the [`str::from_utf8`] function.
2249 /// [`str::from_utf8`]: ./str/fn.from_utf8.html
2256 /// let bytes = "bors".as_bytes();
2257 /// assert_eq!(b"bors", bytes);
2259 #[stable(feature = "rust1", since = "1.0.0")]
2260 #[rustc_const_stable(feature = "str_as_bytes", since = "1.32.0")]
2262 #[allow(unused_attributes)]
2263 #[allow_internal_unstable(const_fn_union)]
2264 pub const fn as_bytes(&self) -> &[u8] {
2270 // SAFETY: const sound because we transmute two types with the same layout
2271 unsafe { Slices { str: self }.slice }
2274 /// Converts a mutable string slice to a mutable byte slice. To convert the
2275 /// mutable byte slice back into a mutable string slice, use the
2276 /// [`str::from_utf8_mut`] function.
2278 /// [`str::from_utf8_mut`]: ./str/fn.from_utf8_mut.html
2285 /// let mut s = String::from("Hello");
2286 /// let bytes = unsafe { s.as_bytes_mut() };
2288 /// assert_eq!(b"Hello", bytes);
2294 /// let mut s = String::from("🗻∈🌏");
2297 /// let bytes = s.as_bytes_mut();
2299 /// bytes[0] = 0xF0;
2300 /// bytes[1] = 0x9F;
2301 /// bytes[2] = 0x8D;
2302 /// bytes[3] = 0x94;
2305 /// assert_eq!("🍔∈🌏", s);
2307 #[stable(feature = "str_mut_extras", since = "1.20.0")]
2309 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
2310 &mut *(self as *mut str as *mut [u8])
2313 /// Converts a string slice to a raw pointer.
2315 /// As string slices are a slice of bytes, the raw pointer points to a
2316 /// [`u8`]. This pointer will be pointing to the first byte of the string
2319 /// The caller must ensure that the returned pointer is never written to.
2320 /// If you need to mutate the contents of the string slice, use [`as_mut_ptr`].
2322 /// [`u8`]: primitive.u8.html
2323 /// [`as_mut_ptr`]: #method.as_mut_ptr
2330 /// let s = "Hello";
2331 /// let ptr = s.as_ptr();
2333 #[stable(feature = "rust1", since = "1.0.0")]
2334 #[rustc_const_stable(feature = "rustc_str_as_ptr", since = "1.32.0")]
2336 pub const fn as_ptr(&self) -> *const u8 {
2337 self as *const str as *const u8
2340 /// Converts a mutable string slice to a raw pointer.
2342 /// As string slices are a slice of bytes, the raw pointer points to a
2343 /// [`u8`]. This pointer will be pointing to the first byte of the string
2346 /// It is your responsibility to make sure that the string slice only gets
2347 /// modified in a way that it remains valid UTF-8.
2349 /// [`u8`]: primitive.u8.html
2350 #[stable(feature = "str_as_mut_ptr", since = "1.36.0")]
2352 pub fn as_mut_ptr(&mut self) -> *mut u8 {
2353 self as *mut str as *mut u8
2356 /// Returns a subslice of `str`.
2358 /// This is the non-panicking alternative to indexing the `str`. Returns
2359 /// [`None`] whenever equivalent indexing operation would panic.
2361 /// [`None`]: option/enum.Option.html#variant.None
2366 /// let v = String::from("🗻∈🌏");
2368 /// assert_eq!(Some("🗻"), v.get(0..4));
2370 /// // indices not on UTF-8 sequence boundaries
2371 /// assert!(v.get(1..).is_none());
2372 /// assert!(v.get(..8).is_none());
2374 /// // out of bounds
2375 /// assert!(v.get(..42).is_none());
2377 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2379 pub fn get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output> {
2383 /// Returns a mutable subslice of `str`.
2385 /// This is the non-panicking alternative to indexing the `str`. Returns
2386 /// [`None`] whenever equivalent indexing operation would panic.
2388 /// [`None`]: option/enum.Option.html#variant.None
2393 /// let mut v = String::from("hello");
2394 /// // correct length
2395 /// assert!(v.get_mut(0..5).is_some());
2396 /// // out of bounds
2397 /// assert!(v.get_mut(..42).is_none());
2398 /// assert_eq!(Some("he"), v.get_mut(0..2).map(|v| &*v));
2400 /// assert_eq!("hello", v);
2402 /// let s = v.get_mut(0..2);
2403 /// let s = s.map(|s| {
2404 /// s.make_ascii_uppercase();
2407 /// assert_eq!(Some("HE"), s);
2409 /// assert_eq!("HEllo", v);
2411 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2413 pub fn get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output> {
2417 /// Returns an unchecked subslice of `str`.
2419 /// This is the unchecked alternative to indexing the `str`.
2423 /// Callers of this function are responsible that these preconditions are
2426 /// * The starting index must come before the ending index;
2427 /// * Indexes must be within bounds of the original slice;
2428 /// * Indexes must lie on UTF-8 sequence boundaries.
2430 /// Failing that, the returned string slice may reference invalid memory or
2431 /// violate the invariants communicated by the `str` type.
2438 /// assert_eq!("🗻", v.get_unchecked(0..4));
2439 /// assert_eq!("∈", v.get_unchecked(4..7));
2440 /// assert_eq!("🌏", v.get_unchecked(7..11));
2443 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2445 pub unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output {
2446 i.get_unchecked(self)
2449 /// Returns a mutable, unchecked subslice of `str`.
2451 /// This is the unchecked alternative to indexing the `str`.
2455 /// Callers of this function are responsible that these preconditions are
2458 /// * The starting index must come before the ending index;
2459 /// * Indexes must be within bounds of the original slice;
2460 /// * Indexes must lie on UTF-8 sequence boundaries.
2462 /// Failing that, the returned string slice may reference invalid memory or
2463 /// violate the invariants communicated by the `str` type.
2468 /// let mut v = String::from("🗻∈🌏");
2470 /// assert_eq!("🗻", v.get_unchecked_mut(0..4));
2471 /// assert_eq!("∈", v.get_unchecked_mut(4..7));
2472 /// assert_eq!("🌏", v.get_unchecked_mut(7..11));
2475 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
2477 pub unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output {
2478 i.get_unchecked_mut(self)
2481 /// Creates a string slice from another string slice, bypassing safety
2484 /// This is generally not recommended, use with caution! For a safe
2485 /// alternative see [`str`] and [`Index`].
2487 /// [`str`]: primitive.str.html
2488 /// [`Index`]: ops/trait.Index.html
2490 /// This new slice goes from `begin` to `end`, including `begin` but
2491 /// excluding `end`.
2493 /// To get a mutable string slice instead, see the
2494 /// [`slice_mut_unchecked`] method.
2496 /// [`slice_mut_unchecked`]: #method.slice_mut_unchecked
2500 /// Callers of this function are responsible that three preconditions are
2503 /// * `begin` must come before `end`.
2504 /// * `begin` and `end` must be byte positions within the string slice.
2505 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2512 /// let s = "Löwe 老虎 Léopard";
2515 /// assert_eq!("Löwe 老虎 Léopard", s.slice_unchecked(0, 21));
2518 /// let s = "Hello, world!";
2521 /// assert_eq!("world", s.slice_unchecked(7, 12));
2524 #[stable(feature = "rust1", since = "1.0.0")]
2525 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked(begin..end)` instead")]
2527 pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
2528 (begin..end).get_unchecked(self)
2531 /// Creates a string slice from another string slice, bypassing safety
2533 /// This is generally not recommended, use with caution! For a safe
2534 /// alternative see [`str`] and [`IndexMut`].
2536 /// [`str`]: primitive.str.html
2537 /// [`IndexMut`]: ops/trait.IndexMut.html
2539 /// This new slice goes from `begin` to `end`, including `begin` but
2540 /// excluding `end`.
2542 /// To get an immutable string slice instead, see the
2543 /// [`slice_unchecked`] method.
2545 /// [`slice_unchecked`]: #method.slice_unchecked
2549 /// Callers of this function are responsible that three preconditions are
2552 /// * `begin` must come before `end`.
2553 /// * `begin` and `end` must be byte positions within the string slice.
2554 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
2555 #[stable(feature = "str_slice_mut", since = "1.5.0")]
2556 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked_mut(begin..end)` instead")]
2558 pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
2559 (begin..end).get_unchecked_mut(self)
2562 /// Divide one string slice into two at an index.
2564 /// The argument, `mid`, should be a byte offset from the start of the
2565 /// string. It must also be on the boundary of a UTF-8 code point.
2567 /// The two slices returned go from the start of the string slice to `mid`,
2568 /// and from `mid` to the end of the string slice.
2570 /// To get mutable string slices instead, see the [`split_at_mut`]
2573 /// [`split_at_mut`]: #method.split_at_mut
2577 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2578 /// beyond the last code point of the string slice.
2585 /// let s = "Per Martin-Löf";
2587 /// let (first, last) = s.split_at(3);
2589 /// assert_eq!("Per", first);
2590 /// assert_eq!(" Martin-Löf", last);
2593 #[stable(feature = "str_split_at", since = "1.4.0")]
2594 pub fn split_at(&self, mid: usize) -> (&str, &str) {
2595 // is_char_boundary checks that the index is in [0, .len()]
2596 if self.is_char_boundary(mid) {
2597 // SAFETY: just checked that `mid` is on a char boundary.
2598 unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) }
2600 slice_error_fail(self, 0, mid)
2604 /// Divide one mutable string slice into two at an index.
2606 /// The argument, `mid`, should be a byte offset from the start of the
2607 /// string. It must also be on the boundary of a UTF-8 code point.
2609 /// The two slices returned go from the start of the string slice to `mid`,
2610 /// and from `mid` to the end of the string slice.
2612 /// To get immutable string slices instead, see the [`split_at`] method.
2614 /// [`split_at`]: #method.split_at
2618 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
2619 /// beyond the last code point of the string slice.
2626 /// let mut s = "Per Martin-Löf".to_string();
2628 /// let (first, last) = s.split_at_mut(3);
2629 /// first.make_ascii_uppercase();
2630 /// assert_eq!("PER", first);
2631 /// assert_eq!(" Martin-Löf", last);
2633 /// assert_eq!("PER Martin-Löf", s);
2636 #[stable(feature = "str_split_at", since = "1.4.0")]
2637 pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2638 // is_char_boundary checks that the index is in [0, .len()]
2639 if self.is_char_boundary(mid) {
2640 let len = self.len();
2641 let ptr = self.as_mut_ptr();
2642 // SAFETY: just checked that `mid` is on a char boundary.
2645 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
2646 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr.add(mid), len - mid)),
2650 slice_error_fail(self, 0, mid)
2654 /// Returns an iterator over the [`char`]s of a string slice.
2656 /// As a string slice consists of valid UTF-8, we can iterate through a
2657 /// string slice by [`char`]. This method returns such an iterator.
2659 /// It's important to remember that [`char`] represents a Unicode Scalar
2660 /// Value, and may not match your idea of what a 'character' is. Iteration
2661 /// over grapheme clusters may be what you actually want.
2668 /// let word = "goodbye";
2670 /// let count = word.chars().count();
2671 /// assert_eq!(7, count);
2673 /// let mut chars = word.chars();
2675 /// assert_eq!(Some('g'), chars.next());
2676 /// assert_eq!(Some('o'), chars.next());
2677 /// assert_eq!(Some('o'), chars.next());
2678 /// assert_eq!(Some('d'), chars.next());
2679 /// assert_eq!(Some('b'), chars.next());
2680 /// assert_eq!(Some('y'), chars.next());
2681 /// assert_eq!(Some('e'), chars.next());
2683 /// assert_eq!(None, chars.next());
2686 /// Remember, [`char`]s may not match your human intuition about characters:
2691 /// let mut chars = y.chars();
2693 /// assert_eq!(Some('y'), chars.next()); // not 'y̆'
2694 /// assert_eq!(Some('\u{0306}'), chars.next());
2696 /// assert_eq!(None, chars.next());
2698 #[stable(feature = "rust1", since = "1.0.0")]
2700 pub fn chars(&self) -> Chars<'_> {
2701 Chars { iter: self.as_bytes().iter() }
2704 /// Returns an iterator over the [`char`]s of a string slice, and their
2707 /// As a string slice consists of valid UTF-8, we can iterate through a
2708 /// string slice by [`char`]. This method returns an iterator of both
2709 /// these [`char`]s, as well as their byte positions.
2711 /// The iterator yields tuples. The position is first, the [`char`] is
2719 /// let word = "goodbye";
2721 /// let count = word.char_indices().count();
2722 /// assert_eq!(7, count);
2724 /// let mut char_indices = word.char_indices();
2726 /// assert_eq!(Some((0, 'g')), char_indices.next());
2727 /// assert_eq!(Some((1, 'o')), char_indices.next());
2728 /// assert_eq!(Some((2, 'o')), char_indices.next());
2729 /// assert_eq!(Some((3, 'd')), char_indices.next());
2730 /// assert_eq!(Some((4, 'b')), char_indices.next());
2731 /// assert_eq!(Some((5, 'y')), char_indices.next());
2732 /// assert_eq!(Some((6, 'e')), char_indices.next());
2734 /// assert_eq!(None, char_indices.next());
2737 /// Remember, [`char`]s may not match your human intuition about characters:
2740 /// let yes = "y̆es";
2742 /// let mut char_indices = yes.char_indices();
2744 /// assert_eq!(Some((0, 'y')), char_indices.next()); // not (0, 'y̆')
2745 /// assert_eq!(Some((1, '\u{0306}')), char_indices.next());
2747 /// // note the 3 here - the last character took up two bytes
2748 /// assert_eq!(Some((3, 'e')), char_indices.next());
2749 /// assert_eq!(Some((4, 's')), char_indices.next());
2751 /// assert_eq!(None, char_indices.next());
2753 #[stable(feature = "rust1", since = "1.0.0")]
2755 pub fn char_indices(&self) -> CharIndices<'_> {
2756 CharIndices { front_offset: 0, iter: self.chars() }
2759 /// An iterator over the bytes of a string slice.
2761 /// As a string slice consists of a sequence of bytes, we can iterate
2762 /// through a string slice by byte. This method returns such an iterator.
2769 /// let mut bytes = "bors".bytes();
2771 /// assert_eq!(Some(b'b'), bytes.next());
2772 /// assert_eq!(Some(b'o'), bytes.next());
2773 /// assert_eq!(Some(b'r'), bytes.next());
2774 /// assert_eq!(Some(b's'), bytes.next());
2776 /// assert_eq!(None, bytes.next());
2778 #[stable(feature = "rust1", since = "1.0.0")]
2780 pub fn bytes(&self) -> Bytes<'_> {
2781 Bytes(self.as_bytes().iter().cloned())
2784 /// Splits a string slice by whitespace.
2786 /// The iterator returned will return string slices that are sub-slices of
2787 /// the original string slice, separated by any amount of whitespace.
2789 /// 'Whitespace' is defined according to the terms of the Unicode Derived
2790 /// Core Property `White_Space`. If you only want to split on ASCII whitespace
2791 /// instead, use [`split_ascii_whitespace`].
2793 /// [`split_ascii_whitespace`]: #method.split_ascii_whitespace
2800 /// let mut iter = "A few words".split_whitespace();
2802 /// assert_eq!(Some("A"), iter.next());
2803 /// assert_eq!(Some("few"), iter.next());
2804 /// assert_eq!(Some("words"), iter.next());
2806 /// assert_eq!(None, iter.next());
2809 /// All kinds of whitespace are considered:
2812 /// let mut iter = " Mary had\ta\u{2009}little \n\t lamb".split_whitespace();
2813 /// assert_eq!(Some("Mary"), iter.next());
2814 /// assert_eq!(Some("had"), iter.next());
2815 /// assert_eq!(Some("a"), iter.next());
2816 /// assert_eq!(Some("little"), iter.next());
2817 /// assert_eq!(Some("lamb"), iter.next());
2819 /// assert_eq!(None, iter.next());
2821 #[stable(feature = "split_whitespace", since = "1.1.0")]
2823 pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
2824 SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
2827 /// Splits a string slice by ASCII whitespace.
2829 /// The iterator returned will return string slices that are sub-slices of
2830 /// the original string slice, separated by any amount of ASCII whitespace.
2832 /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
2834 /// [`split_whitespace`]: #method.split_whitespace
2841 /// let mut iter = "A few words".split_ascii_whitespace();
2843 /// assert_eq!(Some("A"), iter.next());
2844 /// assert_eq!(Some("few"), iter.next());
2845 /// assert_eq!(Some("words"), iter.next());
2847 /// assert_eq!(None, iter.next());
2850 /// All kinds of ASCII whitespace are considered:
2853 /// let mut iter = " Mary had\ta little \n\t lamb".split_ascii_whitespace();
2854 /// assert_eq!(Some("Mary"), iter.next());
2855 /// assert_eq!(Some("had"), iter.next());
2856 /// assert_eq!(Some("a"), iter.next());
2857 /// assert_eq!(Some("little"), iter.next());
2858 /// assert_eq!(Some("lamb"), iter.next());
2860 /// assert_eq!(None, iter.next());
2862 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
2864 pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
2866 self.as_bytes().split(IsAsciiWhitespace).filter(BytesIsNotEmpty).map(UnsafeBytesToStr);
2867 SplitAsciiWhitespace { inner }
2870 /// An iterator over the lines of a string, as string slices.
2872 /// Lines are ended with either a newline (`\n`) or a carriage return with
2873 /// a line feed (`\r\n`).
2875 /// The final line ending is optional.
2882 /// let text = "foo\r\nbar\n\nbaz\n";
2883 /// let mut lines = text.lines();
2885 /// assert_eq!(Some("foo"), lines.next());
2886 /// assert_eq!(Some("bar"), lines.next());
2887 /// assert_eq!(Some(""), lines.next());
2888 /// assert_eq!(Some("baz"), lines.next());
2890 /// assert_eq!(None, lines.next());
2893 /// The final line ending isn't required:
2896 /// let text = "foo\nbar\n\r\nbaz";
2897 /// let mut lines = text.lines();
2899 /// assert_eq!(Some("foo"), lines.next());
2900 /// assert_eq!(Some("bar"), lines.next());
2901 /// assert_eq!(Some(""), lines.next());
2902 /// assert_eq!(Some("baz"), lines.next());
2904 /// assert_eq!(None, lines.next());
2906 #[stable(feature = "rust1", since = "1.0.0")]
2908 pub fn lines(&self) -> Lines<'_> {
2909 Lines(self.split_terminator('\n').map(LinesAnyMap))
2912 /// An iterator over the lines of a string.
2913 #[stable(feature = "rust1", since = "1.0.0")]
2914 #[rustc_deprecated(since = "1.4.0", reason = "use lines() instead now")]
2916 #[allow(deprecated)]
2917 pub fn lines_any(&self) -> LinesAny<'_> {
2918 LinesAny(self.lines())
2921 /// Returns an iterator of `u16` over the string encoded as UTF-16.
2928 /// let text = "Zażółć gęślą jaźń";
2930 /// let utf8_len = text.len();
2931 /// let utf16_len = text.encode_utf16().count();
2933 /// assert!(utf16_len <= utf8_len);
2935 #[stable(feature = "encode_utf16", since = "1.8.0")]
2936 pub fn encode_utf16(&self) -> EncodeUtf16<'_> {
2937 EncodeUtf16 { chars: self.chars(), extra: 0 }
2940 /// Returns `true` if the given pattern matches a sub-slice of
2941 /// this string slice.
2943 /// Returns `false` if it does not.
2950 /// let bananas = "bananas";
2952 /// assert!(bananas.contains("nana"));
2953 /// assert!(!bananas.contains("apples"));
2955 #[stable(feature = "rust1", since = "1.0.0")]
2957 pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
2958 pat.is_contained_in(self)
2961 /// Returns `true` if the given pattern matches a prefix of this
2964 /// Returns `false` if it does not.
2971 /// let bananas = "bananas";
2973 /// assert!(bananas.starts_with("bana"));
2974 /// assert!(!bananas.starts_with("nana"));
2976 #[stable(feature = "rust1", since = "1.0.0")]
2977 pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
2978 pat.is_prefix_of(self)
2981 /// Returns `true` if the given pattern matches a suffix of this
2984 /// Returns `false` if it does not.
2991 /// let bananas = "bananas";
2993 /// assert!(bananas.ends_with("anas"));
2994 /// assert!(!bananas.ends_with("nana"));
2996 #[stable(feature = "rust1", since = "1.0.0")]
2997 pub fn ends_with<'a, P>(&'a self, pat: P) -> bool
2999 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3001 pat.is_suffix_of(self)
3004 /// Returns the byte index of the first character of this string slice that
3005 /// matches the pattern.
3007 /// Returns [`None`] if the pattern doesn't match.
3009 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3010 /// a character matches.
3012 /// [`None`]: option/enum.Option.html#variant.None
3016 /// Simple patterns:
3019 /// let s = "Löwe 老虎 Léopard";
3021 /// assert_eq!(s.find('L'), Some(0));
3022 /// assert_eq!(s.find('é'), Some(14));
3023 /// assert_eq!(s.find("Léopard"), Some(13));
3026 /// More complex patterns using point-free style and closures:
3029 /// let s = "Löwe 老虎 Léopard";
3031 /// assert_eq!(s.find(char::is_whitespace), Some(5));
3032 /// assert_eq!(s.find(char::is_lowercase), Some(1));
3033 /// assert_eq!(s.find(|c: char| c.is_whitespace() || c.is_lowercase()), Some(1));
3034 /// assert_eq!(s.find(|c: char| (c < 'o') && (c > 'a')), Some(4));
3037 /// Not finding the pattern:
3040 /// let s = "Löwe 老虎 Léopard";
3041 /// let x: &[_] = &['1', '2'];
3043 /// assert_eq!(s.find(x), None);
3045 #[stable(feature = "rust1", since = "1.0.0")]
3047 pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
3048 pat.into_searcher(self).next_match().map(|(i, _)| i)
3051 /// Returns the byte index of the last character of this string slice that
3052 /// matches the pattern.
3054 /// Returns [`None`] if the pattern doesn't match.
3056 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3057 /// a character matches.
3059 /// [`None`]: option/enum.Option.html#variant.None
3063 /// Simple patterns:
3066 /// let s = "Löwe 老虎 Léopard";
3068 /// assert_eq!(s.rfind('L'), Some(13));
3069 /// assert_eq!(s.rfind('é'), Some(14));
3072 /// More complex patterns with closures:
3075 /// let s = "Löwe 老虎 Léopard";
3077 /// assert_eq!(s.rfind(char::is_whitespace), Some(12));
3078 /// assert_eq!(s.rfind(char::is_lowercase), Some(20));
3081 /// Not finding the pattern:
3084 /// let s = "Löwe 老虎 Léopard";
3085 /// let x: &[_] = &['1', '2'];
3087 /// assert_eq!(s.rfind(x), None);
3089 #[stable(feature = "rust1", since = "1.0.0")]
3091 pub fn rfind<'a, P>(&'a self, pat: P) -> Option<usize>
3093 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3095 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
3098 /// An iterator over substrings of this string slice, separated by
3099 /// characters matched by a pattern.
3101 /// The pattern can be any type that implements the Pattern trait. Notable
3102 /// examples are `&str`, [`char`], and closures that determines the split.
3104 /// # Iterator behavior
3106 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3107 /// allows a reverse search and forward/reverse search yields the same
3108 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3110 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3112 /// If the pattern allows a reverse search but its results might differ
3113 /// from a forward search, the [`rsplit`] method can be used.
3115 /// [`rsplit`]: #method.rsplit
3119 /// Simple patterns:
3122 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
3123 /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
3125 /// let v: Vec<&str> = "".split('X').collect();
3126 /// assert_eq!(v, [""]);
3128 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
3129 /// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
3131 /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect();
3132 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3134 /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect();
3135 /// assert_eq!(v, ["abc", "def", "ghi"]);
3137 /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect();
3138 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
3141 /// A more complex pattern, using a closure:
3144 /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect();
3145 /// assert_eq!(v, ["abc", "def", "ghi"]);
3148 /// If a string contains multiple contiguous separators, you will end up
3149 /// with empty strings in the output:
3152 /// let x = "||||a||b|c".to_string();
3153 /// let d: Vec<_> = x.split('|').collect();
3155 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3158 /// Contiguous separators are separated by the empty string.
3161 /// let x = "(///)".to_string();
3162 /// let d: Vec<_> = x.split('/').collect();
3164 /// assert_eq!(d, &["(", "", "", ")"]);
3167 /// Separators at the start or end of a string are neighbored
3168 /// by empty strings.
3171 /// let d: Vec<_> = "010".split("0").collect();
3172 /// assert_eq!(d, &["", "1", ""]);
3175 /// When the empty string is used as a separator, it separates
3176 /// every character in the string, along with the beginning
3177 /// and end of the string.
3180 /// let f: Vec<_> = "rust".split("").collect();
3181 /// assert_eq!(f, &["", "r", "u", "s", "t", ""]);
3184 /// Contiguous separators can lead to possibly surprising behavior
3185 /// when whitespace is used as the separator. This code is correct:
3188 /// let x = " a b c".to_string();
3189 /// let d: Vec<_> = x.split(' ').collect();
3191 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
3194 /// It does _not_ give you:
3197 /// assert_eq!(d, &["a", "b", "c"]);
3200 /// Use [`split_whitespace`] for this behavior.
3202 /// [`split_whitespace`]: #method.split_whitespace
3203 #[stable(feature = "rust1", since = "1.0.0")]
3205 pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
3206 Split(SplitInternal {
3209 matcher: pat.into_searcher(self),
3210 allow_trailing_empty: true,
3215 /// An iterator over substrings of the given string slice, separated by
3216 /// characters matched by a pattern and yielded in reverse order.
3218 /// The pattern can be any type that implements the Pattern trait. Notable
3219 /// examples are `&str`, [`char`], and closures that determines the split.
3221 /// # Iterator behavior
3223 /// The returned iterator requires that the pattern supports a reverse
3224 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3225 /// search yields the same elements.
3227 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3229 /// For iterating from the front, the [`split`] method can be used.
3231 /// [`split`]: #method.split
3235 /// Simple patterns:
3238 /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
3239 /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
3241 /// let v: Vec<&str> = "".rsplit('X').collect();
3242 /// assert_eq!(v, [""]);
3244 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect();
3245 /// assert_eq!(v, ["leopard", "tiger", "", "lion"]);
3247 /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
3248 /// assert_eq!(v, ["leopard", "tiger", "lion"]);
3251 /// A more complex pattern, using a closure:
3254 /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect();
3255 /// assert_eq!(v, ["ghi", "def", "abc"]);
3257 #[stable(feature = "rust1", since = "1.0.0")]
3259 pub fn rsplit<'a, P>(&'a self, pat: P) -> RSplit<'a, P>
3261 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3263 RSplit(self.split(pat).0)
3266 /// An iterator over substrings of the given string slice, separated by
3267 /// characters matched by a pattern.
3269 /// The pattern can be any type that implements the Pattern trait. Notable
3270 /// examples are `&str`, [`char`], and closures that determines the split.
3272 /// Equivalent to [`split`], except that the trailing substring
3273 /// is skipped if empty.
3275 /// [`split`]: #method.split
3277 /// This method can be used for string data that is _terminated_,
3278 /// rather than _separated_ by a pattern.
3280 /// # Iterator behavior
3282 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3283 /// allows a reverse search and forward/reverse search yields the same
3284 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3286 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3288 /// If the pattern allows a reverse search but its results might differ
3289 /// from a forward search, the [`rsplit_terminator`] method can be used.
3291 /// [`rsplit_terminator`]: #method.rsplit_terminator
3298 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
3299 /// assert_eq!(v, ["A", "B"]);
3301 /// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
3302 /// assert_eq!(v, ["A", "", "B", ""]);
3304 #[stable(feature = "rust1", since = "1.0.0")]
3306 pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
3307 SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 })
3310 /// An iterator over substrings of `self`, separated by characters
3311 /// matched by a pattern and yielded in reverse order.
3313 /// The pattern can be any type that implements the Pattern trait. Notable
3314 /// examples are `&str`, [`char`], and closures that determines the split.
3315 /// Additional libraries might provide more complex patterns like
3316 /// regular expressions.
3318 /// Equivalent to [`split`], except that the trailing substring is
3319 /// skipped if empty.
3321 /// [`split`]: #method.split
3323 /// This method can be used for string data that is _terminated_,
3324 /// rather than _separated_ by a pattern.
3326 /// # Iterator behavior
3328 /// The returned iterator requires that the pattern supports a
3329 /// reverse search, and it will be double ended if a forward/reverse
3330 /// search yields the same elements.
3332 /// For iterating from the front, the [`split_terminator`] method can be
3335 /// [`split_terminator`]: #method.split_terminator
3340 /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect();
3341 /// assert_eq!(v, ["B", "A"]);
3343 /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
3344 /// assert_eq!(v, ["", "B", "", "A"]);
3346 #[stable(feature = "rust1", since = "1.0.0")]
3348 pub fn rsplit_terminator<'a, P>(&'a self, pat: P) -> RSplitTerminator<'a, P>
3350 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3352 RSplitTerminator(self.split_terminator(pat).0)
3355 /// An iterator over substrings of the given string slice, separated by a
3356 /// pattern, restricted to returning at most `n` items.
3358 /// If `n` substrings are returned, the last substring (the `n`th substring)
3359 /// will contain the remainder of the string.
3361 /// The pattern can be any type that implements the Pattern trait. Notable
3362 /// examples are `&str`, [`char`], and closures that determines the split.
3364 /// # Iterator behavior
3366 /// The returned iterator will not be double ended, because it is
3367 /// not efficient to support.
3369 /// If the pattern allows a reverse search, the [`rsplitn`] method can be
3372 /// [`rsplitn`]: #method.rsplitn
3376 /// Simple patterns:
3379 /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect();
3380 /// assert_eq!(v, ["Mary", "had", "a little lambda"]);
3382 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect();
3383 /// assert_eq!(v, ["lion", "", "tigerXleopard"]);
3385 /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
3386 /// assert_eq!(v, ["abcXdef"]);
3388 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
3389 /// assert_eq!(v, [""]);
3392 /// A more complex pattern, using a closure:
3395 /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect();
3396 /// assert_eq!(v, ["abc", "defXghi"]);
3398 #[stable(feature = "rust1", since = "1.0.0")]
3400 pub fn splitn<'a, P: Pattern<'a>>(&'a self, n: usize, pat: P) -> SplitN<'a, P> {
3401 SplitN(SplitNInternal { iter: self.split(pat).0, count: n })
3404 /// An iterator over substrings of this string slice, separated by a
3405 /// pattern, starting from the end of the string, restricted to returning
3406 /// at most `n` items.
3408 /// If `n` substrings are returned, the last substring (the `n`th substring)
3409 /// will contain the remainder of the string.
3411 /// The pattern can be any type that implements the Pattern trait. Notable
3412 /// examples are `&str`, [`char`], and closures that determines the split.
3414 /// # Iterator behavior
3416 /// The returned iterator will not be double ended, because it is not
3417 /// efficient to support.
3419 /// For splitting from the front, the [`splitn`] method can be used.
3421 /// [`splitn`]: #method.splitn
3425 /// Simple patterns:
3428 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect();
3429 /// assert_eq!(v, ["lamb", "little", "Mary had a"]);
3431 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect();
3432 /// assert_eq!(v, ["leopard", "tiger", "lionX"]);
3434 /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect();
3435 /// assert_eq!(v, ["leopard", "lion::tiger"]);
3438 /// A more complex pattern, using a closure:
3441 /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect();
3442 /// assert_eq!(v, ["ghi", "abc1def"]);
3444 #[stable(feature = "rust1", since = "1.0.0")]
3446 pub fn rsplitn<'a, P>(&'a self, n: usize, pat: P) -> RSplitN<'a, P>
3448 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3450 RSplitN(self.splitn(n, pat).0)
3453 /// An iterator over the disjoint matches of a pattern within the given string
3456 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3457 /// a character matches.
3459 /// # Iterator behavior
3461 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3462 /// allows a reverse search and forward/reverse search yields the same
3463 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3465 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3467 /// If the pattern allows a reverse search but its results might differ
3468 /// from a forward search, the [`rmatches`] method can be used.
3470 /// [`rmatches`]: #method.rmatches
3477 /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
3478 /// assert_eq!(v, ["abc", "abc", "abc"]);
3480 /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect();
3481 /// assert_eq!(v, ["1", "2", "3"]);
3483 #[stable(feature = "str_matches", since = "1.2.0")]
3485 pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
3486 Matches(MatchesInternal(pat.into_searcher(self)))
3489 /// An iterator over the disjoint matches of a pattern within this string slice,
3490 /// yielded in reverse order.
3492 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3493 /// a character matches.
3495 /// # Iterator behavior
3497 /// The returned iterator requires that the pattern supports a reverse
3498 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3499 /// search yields the same elements.
3501 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3503 /// For iterating from the front, the [`matches`] method can be used.
3505 /// [`matches`]: #method.matches
3512 /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
3513 /// assert_eq!(v, ["abc", "abc", "abc"]);
3515 /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect();
3516 /// assert_eq!(v, ["3", "2", "1"]);
3518 #[stable(feature = "str_matches", since = "1.2.0")]
3520 pub fn rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P>
3522 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3524 RMatches(self.matches(pat).0)
3527 /// An iterator over the disjoint matches of a pattern within this string
3528 /// slice as well as the index that the match starts at.
3530 /// For matches of `pat` within `self` that overlap, only the indices
3531 /// corresponding to the first match are returned.
3533 /// The pattern can be a `&str`, [`char`], or a closure that determines
3534 /// if a character matches.
3536 /// # Iterator behavior
3538 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
3539 /// allows a reverse search and forward/reverse search yields the same
3540 /// elements. This is true for, e.g., [`char`], but not for `&str`.
3542 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3544 /// If the pattern allows a reverse search but its results might differ
3545 /// from a forward search, the [`rmatch_indices`] method can be used.
3547 /// [`rmatch_indices`]: #method.rmatch_indices
3554 /// let v: Vec<_> = "abcXXXabcYYYabc".match_indices("abc").collect();
3555 /// assert_eq!(v, [(0, "abc"), (6, "abc"), (12, "abc")]);
3557 /// let v: Vec<_> = "1abcabc2".match_indices("abc").collect();
3558 /// assert_eq!(v, [(1, "abc"), (4, "abc")]);
3560 /// let v: Vec<_> = "ababa".match_indices("aba").collect();
3561 /// assert_eq!(v, [(0, "aba")]); // only the first `aba`
3563 #[stable(feature = "str_match_indices", since = "1.5.0")]
3565 pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
3566 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
3569 /// An iterator over the disjoint matches of a pattern within `self`,
3570 /// yielded in reverse order along with the index of the match.
3572 /// For matches of `pat` within `self` that overlap, only the indices
3573 /// corresponding to the last match are returned.
3575 /// The pattern can be a `&str`, [`char`], or a closure that determines if a
3576 /// character matches.
3578 /// # Iterator behavior
3580 /// The returned iterator requires that the pattern supports a reverse
3581 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
3582 /// search yields the same elements.
3584 /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html
3586 /// For iterating from the front, the [`match_indices`] method can be used.
3588 /// [`match_indices`]: #method.match_indices
3595 /// let v: Vec<_> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
3596 /// assert_eq!(v, [(12, "abc"), (6, "abc"), (0, "abc")]);
3598 /// let v: Vec<_> = "1abcabc2".rmatch_indices("abc").collect();
3599 /// assert_eq!(v, [(4, "abc"), (1, "abc")]);
3601 /// let v: Vec<_> = "ababa".rmatch_indices("aba").collect();
3602 /// assert_eq!(v, [(2, "aba")]); // only the last `aba`
3604 #[stable(feature = "str_match_indices", since = "1.5.0")]
3606 pub fn rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P>
3608 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3610 RMatchIndices(self.match_indices(pat).0)
3613 /// Returns a string slice with leading and trailing whitespace removed.
3615 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3616 /// Core Property `White_Space`.
3623 /// let s = " Hello\tworld\t";
3625 /// assert_eq!("Hello\tworld", s.trim());
3627 #[must_use = "this returns the trimmed string as a slice, \
3628 without modifying the original"]
3629 #[stable(feature = "rust1", since = "1.0.0")]
3630 pub fn trim(&self) -> &str {
3631 self.trim_matches(|c: char| c.is_whitespace())
3634 /// Returns a string slice with leading whitespace removed.
3636 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3637 /// Core Property `White_Space`.
3639 /// # Text directionality
3641 /// A string is a sequence of bytes. `start` in this context means the first
3642 /// position of that byte string; for a left-to-right language like English or
3643 /// Russian, this will be left side, and for right-to-left languages like
3644 /// Arabic or Hebrew, this will be the right side.
3651 /// let s = " Hello\tworld\t";
3652 /// assert_eq!("Hello\tworld\t", s.trim_start());
3658 /// let s = " English ";
3659 /// assert!(Some('E') == s.trim_start().chars().next());
3661 /// let s = " עברית ";
3662 /// assert!(Some('ע') == s.trim_start().chars().next());
3664 #[must_use = "this returns the trimmed string as a new slice, \
3665 without modifying the original"]
3666 #[stable(feature = "trim_direction", since = "1.30.0")]
3667 pub fn trim_start(&self) -> &str {
3668 self.trim_start_matches(|c: char| c.is_whitespace())
3671 /// Returns a string slice with trailing whitespace removed.
3673 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3674 /// Core Property `White_Space`.
3676 /// # Text directionality
3678 /// A string is a sequence of bytes. `end` in this context means the last
3679 /// position of that byte string; for a left-to-right language like English or
3680 /// Russian, this will be right side, and for right-to-left languages like
3681 /// Arabic or Hebrew, this will be the left side.
3688 /// let s = " Hello\tworld\t";
3689 /// assert_eq!(" Hello\tworld", s.trim_end());
3695 /// let s = " English ";
3696 /// assert!(Some('h') == s.trim_end().chars().rev().next());
3698 /// let s = " עברית ";
3699 /// assert!(Some('ת') == s.trim_end().chars().rev().next());
3701 #[must_use = "this returns the trimmed string as a new slice, \
3702 without modifying the original"]
3703 #[stable(feature = "trim_direction", since = "1.30.0")]
3704 pub fn trim_end(&self) -> &str {
3705 self.trim_end_matches(|c: char| c.is_whitespace())
3708 /// Returns a string slice with leading whitespace removed.
3710 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3711 /// Core Property `White_Space`.
3713 /// # Text directionality
3715 /// A string is a sequence of bytes. 'Left' in this context means the first
3716 /// position of that byte string; for a language like Arabic or Hebrew
3717 /// which are 'right to left' rather than 'left to right', this will be
3718 /// the _right_ side, not the left.
3725 /// let s = " Hello\tworld\t";
3727 /// assert_eq!("Hello\tworld\t", s.trim_left());
3733 /// let s = " English";
3734 /// assert!(Some('E') == s.trim_left().chars().next());
3736 /// let s = " עברית";
3737 /// assert!(Some('ע') == s.trim_left().chars().next());
3739 #[stable(feature = "rust1", since = "1.0.0")]
3742 reason = "superseded by `trim_start`",
3743 suggestion = "trim_start"
3745 pub fn trim_left(&self) -> &str {
3749 /// Returns a string slice with trailing whitespace removed.
3751 /// 'Whitespace' is defined according to the terms of the Unicode Derived
3752 /// Core Property `White_Space`.
3754 /// # Text directionality
3756 /// A string is a sequence of bytes. 'Right' in this context means the last
3757 /// position of that byte string; for a language like Arabic or Hebrew
3758 /// which are 'right to left' rather than 'left to right', this will be
3759 /// the _left_ side, not the right.
3766 /// let s = " Hello\tworld\t";
3768 /// assert_eq!(" Hello\tworld", s.trim_right());
3774 /// let s = "English ";
3775 /// assert!(Some('h') == s.trim_right().chars().rev().next());
3777 /// let s = "עברית ";
3778 /// assert!(Some('ת') == s.trim_right().chars().rev().next());
3780 #[stable(feature = "rust1", since = "1.0.0")]
3783 reason = "superseded by `trim_end`",
3784 suggestion = "trim_end"
3786 pub fn trim_right(&self) -> &str {
3790 /// Returns a string slice with all prefixes and suffixes that match a
3791 /// pattern repeatedly removed.
3793 /// The pattern can be a [`char`] or a closure that determines if a
3794 /// character matches.
3798 /// Simple patterns:
3801 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
3802 /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar");
3804 /// let x: &[_] = &['1', '2'];
3805 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
3808 /// A more complex pattern, using a closure:
3811 /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar");
3813 #[must_use = "this returns the trimmed string as a new slice, \
3814 without modifying the original"]
3815 #[stable(feature = "rust1", since = "1.0.0")]
3816 pub fn trim_matches<'a, P>(&'a self, pat: P) -> &'a str
3818 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
3822 let mut matcher = pat.into_searcher(self);
3823 if let Some((a, b)) = matcher.next_reject() {
3825 j = b; // Remember earliest known match, correct it below if
3826 // last match is different
3828 if let Some((_, b)) = matcher.next_reject_back() {
3831 // SAFETY: `Searcher` is known to return valid indices.
3833 self.get_unchecked(i..j)
3837 /// Returns a string slice with all prefixes that match a pattern
3838 /// repeatedly removed.
3840 /// The pattern can be a `&str`, [`char`], or a closure that determines if
3841 /// a character matches.
3843 /// # Text directionality
3845 /// A string is a sequence of bytes. `start` in this context means the first
3846 /// position of that byte string; for a left-to-right language like English or
3847 /// Russian, this will be left side, and for right-to-left languages like
3848 /// Arabic or Hebrew, this will be the right side.
3855 /// assert_eq!("11foo1bar11".trim_start_matches('1'), "foo1bar11");
3856 /// assert_eq!("123foo1bar123".trim_start_matches(char::is_numeric), "foo1bar123");
3858 /// let x: &[_] = &['1', '2'];
3859 /// assert_eq!("12foo1bar12".trim_start_matches(x), "foo1bar12");
3861 #[must_use = "this returns the trimmed string as a new slice, \
3862 without modifying the original"]
3863 #[stable(feature = "trim_direction", since = "1.30.0")]
3864 pub fn trim_start_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
3865 let mut i = self.len();
3866 let mut matcher = pat.into_searcher(self);
3867 if let Some((a, _)) = matcher.next_reject() {
3870 // SAFETY: `Searcher` is known to return valid indices.
3872 self.get_unchecked(i..self.len())
3876 /// Returns a string slice with the prefix removed.
3878 /// If the string starts with the pattern `prefix`, `Some` is returned with the substring where
3879 /// the prefix is removed. Unlike `trim_start_matches`, this method removes the prefix exactly
3882 /// If the string does not start with `prefix`, `None` is returned.
3887 /// #![feature(str_strip)]
3889 /// assert_eq!("foobar".strip_prefix("foo"), Some("bar"));
3890 /// assert_eq!("foobar".strip_prefix("bar"), None);
3891 /// assert_eq!("foofoo".strip_prefix("foo"), Some("foo"));
3893 #[must_use = "this returns the remaining substring as a new slice, \
3894 without modifying the original"]
3895 #[unstable(feature = "str_strip", reason = "newly added", issue = "67302")]
3896 pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> {
3897 let mut matcher = prefix.into_searcher(self);
3898 if let SearchStep::Match(start, len) = matcher.next() {
3901 "The first search step from Searcher \
3902 must include the first character"
3904 // SAFETY: `Searcher` is known to return valid indices.
3906 Some(self.get_unchecked(len..))
3913 /// Returns a string slice with the suffix removed.
3915 /// If the string ends with the pattern `suffix`, `Some` is returned with the substring where
3916 /// the suffix is removed. Unlike `trim_end_matches`, this method removes the suffix exactly
3919 /// If the string does not end with `suffix`, `None` is returned.
3924 /// #![feature(str_strip)]
3925 /// assert_eq!("barfoo".strip_suffix("foo"), Some("bar"));
3926 /// assert_eq!("barfoo".strip_suffix("bar"), None);
3927 /// assert_eq!("foofoo".strip_suffix("foo"), Some("foo"));
3929 #[must_use = "this returns the remaining substring as a new slice, \
3930 without modifying the original"]
3931 #[unstable(feature = "str_strip", reason = "newly added", issue = "67302")]
3932 pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a str>
3935 <P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
3937 let mut matcher = suffix.into_searcher(self);
3938 if let SearchStep::Match(start, end) = matcher.next_back() {
3942 "The first search step from ReverseSearcher \
3943 must include the last character"
3945 // SAFETY: `Searcher` is known to return valid indices.
3947 Some(self.get_unchecked(..start))
3954 /// Returns a string slice with all suffixes that match a pattern
3955 /// repeatedly removed.
3957 /// The pattern can be a `&str`, [`char`], or a closure that
3958 /// determines if a character matches.
3960 /// # Text directionality
3962 /// A string is a sequence of bytes. `end` in this context means the last
3963 /// position of that byte string; for a left-to-right language like English or
3964 /// Russian, this will be right side, and for right-to-left languages like
3965 /// Arabic or Hebrew, this will be the left side.
3969 /// Simple patterns:
3972 /// assert_eq!("11foo1bar11".trim_end_matches('1'), "11foo1bar");
3973 /// assert_eq!("123foo1bar123".trim_end_matches(char::is_numeric), "123foo1bar");
3975 /// let x: &[_] = &['1', '2'];
3976 /// assert_eq!("12foo1bar12".trim_end_matches(x), "12foo1bar");
3979 /// A more complex pattern, using a closure:
3982 /// assert_eq!("1fooX".trim_end_matches(|c| c == '1' || c == 'X'), "1foo");
3984 #[must_use = "this returns the trimmed string as a new slice, \
3985 without modifying the original"]
3986 #[stable(feature = "trim_direction", since = "1.30.0")]
3987 pub fn trim_end_matches<'a, P>(&'a self, pat: P) -> &'a str
3989 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
3992 let mut matcher = pat.into_searcher(self);
3993 if let Some((_, b)) = matcher.next_reject_back() {
3996 // SAFETY: `Searcher` is known to return valid indices.
3998 self.get_unchecked(0..j)
4002 /// Returns a string slice with all prefixes that match a pattern
4003 /// repeatedly removed.
4005 /// The pattern can be a `&str`, [`char`], or a closure that determines if
4006 /// a character matches.
4008 /// [`char`]: primitive.char.html
4010 /// # Text directionality
4012 /// A string is a sequence of bytes. 'Left' in this context means the first
4013 /// position of that byte string; for a language like Arabic or Hebrew
4014 /// which are 'right to left' rather than 'left to right', this will be
4015 /// the _right_ side, not the left.
4022 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
4023 /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123");
4025 /// let x: &[_] = &['1', '2'];
4026 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
4028 #[stable(feature = "rust1", since = "1.0.0")]
4031 reason = "superseded by `trim_start_matches`",
4032 suggestion = "trim_start_matches"
4034 pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
4035 self.trim_start_matches(pat)
4038 /// Returns a string slice with all suffixes that match a pattern
4039 /// repeatedly removed.
4041 /// The pattern can be a `&str`, [`char`], or a closure that
4042 /// determines if a character matches.
4044 /// [`char`]: primitive.char.html
4046 /// # Text directionality
4048 /// A string is a sequence of bytes. 'Right' in this context means the last
4049 /// position of that byte string; for a language like Arabic or Hebrew
4050 /// which are 'right to left' rather than 'left to right', this will be
4051 /// the _left_ side, not the right.
4055 /// Simple patterns:
4058 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
4059 /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar");
4061 /// let x: &[_] = &['1', '2'];
4062 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
4065 /// A more complex pattern, using a closure:
4068 /// assert_eq!("1fooX".trim_right_matches(|c| c == '1' || c == 'X'), "1foo");
4070 #[stable(feature = "rust1", since = "1.0.0")]
4073 reason = "superseded by `trim_end_matches`",
4074 suggestion = "trim_end_matches"
4076 pub fn trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str
4078 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
4080 self.trim_end_matches(pat)
4083 /// Parses this string slice into another type.
4085 /// Because `parse` is so general, it can cause problems with type
4086 /// inference. As such, `parse` is one of the few times you'll see
4087 /// the syntax affectionately known as the 'turbofish': `::<>`. This
4088 /// helps the inference algorithm understand specifically which type
4089 /// you're trying to parse into.
4091 /// `parse` can parse any type that implements the [`FromStr`] trait.
4093 /// [`FromStr`]: str/trait.FromStr.html
4097 /// Will return [`Err`] if it's not possible to parse this string slice into
4098 /// the desired type.
4100 /// [`Err`]: str/trait.FromStr.html#associatedtype.Err
4107 /// let four: u32 = "4".parse().unwrap();
4109 /// assert_eq!(4, four);
4112 /// Using the 'turbofish' instead of annotating `four`:
4115 /// let four = "4".parse::<u32>();
4117 /// assert_eq!(Ok(4), four);
4120 /// Failing to parse:
4123 /// let nope = "j".parse::<u32>();
4125 /// assert!(nope.is_err());
4128 #[stable(feature = "rust1", since = "1.0.0")]
4129 pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
4130 FromStr::from_str(self)
4133 /// Checks if all characters in this string are within the ASCII range.
4138 /// let ascii = "hello!\n";
4139 /// let non_ascii = "Grüße, Jürgen ❤";
4141 /// assert!(ascii.is_ascii());
4142 /// assert!(!non_ascii.is_ascii());
4144 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4146 pub fn is_ascii(&self) -> bool {
4147 // We can treat each byte as character here: all multibyte characters
4148 // start with a byte that is not in the ascii range, so we will stop
4150 self.bytes().all(|b| b.is_ascii())
4153 /// Checks that two strings are an ASCII case-insensitive match.
4155 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
4156 /// but without allocating and copying temporaries.
4161 /// assert!("Ferris".eq_ignore_ascii_case("FERRIS"));
4162 /// assert!("Ferrös".eq_ignore_ascii_case("FERRöS"));
4163 /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
4165 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4167 pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
4168 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
4171 /// Converts this string to its ASCII upper case equivalent in-place.
4173 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
4174 /// but non-ASCII letters are unchanged.
4176 /// To return a new uppercased value without modifying the existing one, use
4177 /// [`to_ascii_uppercase`].
4179 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
4184 /// let mut s = String::from("Grüße, Jürgen ❤");
4186 /// s.make_ascii_uppercase();
4188 /// assert_eq!("GRüßE, JüRGEN ❤", s);
4190 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4191 pub fn make_ascii_uppercase(&mut self) {
4192 // SAFETY: safe because we transmute two types with the same layout.
4193 let me = unsafe { self.as_bytes_mut() };
4194 me.make_ascii_uppercase()
4197 /// Converts this string to its ASCII lower case equivalent in-place.
4199 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
4200 /// but non-ASCII letters are unchanged.
4202 /// To return a new lowercased value without modifying the existing one, use
4203 /// [`to_ascii_lowercase`].
4205 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
4210 /// let mut s = String::from("GRÜßE, JÜRGEN ❤");
4212 /// s.make_ascii_lowercase();
4214 /// assert_eq!("grÜße, jÜrgen ❤", s);
4216 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
4217 pub fn make_ascii_lowercase(&mut self) {
4218 // SAFETY: safe because we transmute two types with the same layout.
4219 let me = unsafe { self.as_bytes_mut() };
4220 me.make_ascii_lowercase()
4223 /// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
4225 /// Note: only extended grapheme codepoints that begin the string will be
4228 /// [`char::escape_debug`]: ../std/primitive.char.html#method.escape_debug
4235 /// for c in "❤\n!".escape_debug() {
4236 /// print!("{}", c);
4241 /// Using `println!` directly:
4244 /// println!("{}", "❤\n!".escape_debug());
4248 /// Both are equivalent to:
4251 /// println!("❤\\n!");
4254 /// Using `to_string`:
4257 /// assert_eq!("❤\n!".escape_debug().to_string(), "❤\\n!");
4259 #[stable(feature = "str_escape", since = "1.34.0")]
4260 pub fn escape_debug(&self) -> EscapeDebug<'_> {
4261 let mut chars = self.chars();
4265 .map(|first| first.escape_debug_ext(true))
4268 .chain(chars.flat_map(CharEscapeDebugContinue)),
4272 /// Return an iterator that escapes each char in `self` with [`char::escape_default`].
4274 /// [`char::escape_default`]: ../std/primitive.char.html#method.escape_default
4281 /// for c in "❤\n!".escape_default() {
4282 /// print!("{}", c);
4287 /// Using `println!` directly:
4290 /// println!("{}", "❤\n!".escape_default());
4294 /// Both are equivalent to:
4297 /// println!("\\u{{2764}}\\n!");
4300 /// Using `to_string`:
4303 /// assert_eq!("❤\n!".escape_default().to_string(), "\\u{2764}\\n!");
4305 #[stable(feature = "str_escape", since = "1.34.0")]
4306 pub fn escape_default(&self) -> EscapeDefault<'_> {
4307 EscapeDefault { inner: self.chars().flat_map(CharEscapeDefault) }
4310 /// Return an iterator that escapes each char in `self` with [`char::escape_unicode`].
4312 /// [`char::escape_unicode`]: ../std/primitive.char.html#method.escape_unicode
4319 /// for c in "❤\n!".escape_unicode() {
4320 /// print!("{}", c);
4325 /// Using `println!` directly:
4328 /// println!("{}", "❤\n!".escape_unicode());
4332 /// Both are equivalent to:
4335 /// println!("\\u{{2764}}\\u{{a}}\\u{{21}}");
4338 /// Using `to_string`:
4341 /// assert_eq!("❤\n!".escape_unicode().to_string(), "\\u{2764}\\u{a}\\u{21}");
4343 #[stable(feature = "str_escape", since = "1.34.0")]
4344 pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
4345 EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) }
4351 struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
4352 c.escape_debug_ext(false)
4356 struct CharEscapeUnicode impl Fn = |c: char| -> char::EscapeUnicode {
4360 struct CharEscapeDefault impl Fn = |c: char| -> char::EscapeDefault {
4365 #[stable(feature = "rust1", since = "1.0.0")]
4366 impl AsRef<[u8]> for str {
4368 fn as_ref(&self) -> &[u8] {
4373 #[stable(feature = "rust1", since = "1.0.0")]
4374 impl Default for &str {
4375 /// Creates an empty str
4376 fn default() -> Self {
4381 #[stable(feature = "default_mut_str", since = "1.28.0")]
4382 impl Default for &mut str {
4383 /// Creates an empty mutable str
4384 fn default() -> Self {
4385 // SAFETY: The empty string is valid UTF-8.
4386 unsafe { from_utf8_unchecked_mut(&mut []) }
4390 /// An iterator over the non-whitespace substrings of a string,
4391 /// separated by any amount of whitespace.
4393 /// This struct is created by the [`split_whitespace`] method on [`str`].
4394 /// See its documentation for more.
4396 /// [`split_whitespace`]: ../../std/primitive.str.html#method.split_whitespace
4397 /// [`str`]: ../../std/primitive.str.html
4398 #[stable(feature = "split_whitespace", since = "1.1.0")]
4399 #[derive(Clone, Debug)]
4400 pub struct SplitWhitespace<'a> {
4401 inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
4404 /// An iterator over the non-ASCII-whitespace substrings of a string,
4405 /// separated by any amount of ASCII whitespace.
4407 /// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
4408 /// See its documentation for more.
4410 /// [`split_ascii_whitespace`]: ../../std/primitive.str.html#method.split_ascii_whitespace
4411 /// [`str`]: ../../std/primitive.str.html
4412 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4413 #[derive(Clone, Debug)]
4414 pub struct SplitAsciiWhitespace<'a> {
4415 inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, BytesIsNotEmpty>, UnsafeBytesToStr>,
4420 struct IsWhitespace impl Fn = |c: char| -> bool {
4425 struct IsAsciiWhitespace impl Fn = |byte: &u8| -> bool {
4426 byte.is_ascii_whitespace()
4430 struct IsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b str| -> bool {
4435 struct BytesIsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b [u8]| -> bool {
4440 struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
4442 unsafe { from_utf8_unchecked(bytes) }
4446 #[stable(feature = "split_whitespace", since = "1.1.0")]
4447 impl<'a> Iterator for SplitWhitespace<'a> {
4448 type Item = &'a str;
4451 fn next(&mut self) -> Option<&'a str> {
4456 fn size_hint(&self) -> (usize, Option<usize>) {
4457 self.inner.size_hint()
4461 fn last(mut self) -> Option<&'a str> {
4466 #[stable(feature = "split_whitespace", since = "1.1.0")]
4467 impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
4469 fn next_back(&mut self) -> Option<&'a str> {
4470 self.inner.next_back()
4474 #[stable(feature = "fused", since = "1.26.0")]
4475 impl FusedIterator for SplitWhitespace<'_> {}
4477 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4478 impl<'a> Iterator for SplitAsciiWhitespace<'a> {
4479 type Item = &'a str;
4482 fn next(&mut self) -> Option<&'a str> {
4487 fn size_hint(&self) -> (usize, Option<usize>) {
4488 self.inner.size_hint()
4492 fn last(mut self) -> Option<&'a str> {
4497 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4498 impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> {
4500 fn next_back(&mut self) -> Option<&'a str> {
4501 self.inner.next_back()
4505 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
4506 impl FusedIterator for SplitAsciiWhitespace<'_> {}
4508 /// An iterator of [`u16`] over the string encoded as UTF-16.
4510 /// [`u16`]: ../../std/primitive.u16.html
4512 /// This struct is created by the [`encode_utf16`] method on [`str`].
4513 /// See its documentation for more.
4515 /// [`encode_utf16`]: ../../std/primitive.str.html#method.encode_utf16
4516 /// [`str`]: ../../std/primitive.str.html
4518 #[stable(feature = "encode_utf16", since = "1.8.0")]
4519 pub struct EncodeUtf16<'a> {
4524 #[stable(feature = "collection_debug", since = "1.17.0")]
4525 impl fmt::Debug for EncodeUtf16<'_> {
4526 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4527 f.pad("EncodeUtf16 { .. }")
4531 #[stable(feature = "encode_utf16", since = "1.8.0")]
4532 impl<'a> Iterator for EncodeUtf16<'a> {
4536 fn next(&mut self) -> Option<u16> {
4537 if self.extra != 0 {
4538 let tmp = self.extra;
4543 let mut buf = [0; 2];
4544 self.chars.next().map(|ch| {
4545 let n = ch.encode_utf16(&mut buf).len();
4547 self.extra = buf[1];
4554 fn size_hint(&self) -> (usize, Option<usize>) {
4555 let (low, high) = self.chars.size_hint();
4556 // every char gets either one u16 or two u16,
4557 // so this iterator is between 1 or 2 times as
4558 // long as the underlying iterator.
4559 (low, high.and_then(|n| n.checked_mul(2)))
4563 #[stable(feature = "fused", since = "1.26.0")]
4564 impl FusedIterator for EncodeUtf16<'_> {}
4566 /// The return type of [`str::escape_debug`].
4568 /// [`str::escape_debug`]: ../../std/primitive.str.html#method.escape_debug
4569 #[stable(feature = "str_escape", since = "1.34.0")]
4570 #[derive(Clone, Debug)]
4571 pub struct EscapeDebug<'a> {
4573 Flatten<option::IntoIter<char::EscapeDebug>>,
4574 FlatMap<Chars<'a>, char::EscapeDebug, CharEscapeDebugContinue>,
4578 /// The return type of [`str::escape_default`].
4580 /// [`str::escape_default`]: ../../std/primitive.str.html#method.escape_default
4581 #[stable(feature = "str_escape", since = "1.34.0")]
4582 #[derive(Clone, Debug)]
4583 pub struct EscapeDefault<'a> {
4584 inner: FlatMap<Chars<'a>, char::EscapeDefault, CharEscapeDefault>,
4587 /// The return type of [`str::escape_unicode`].
4589 /// [`str::escape_unicode`]: ../../std/primitive.str.html#method.escape_unicode
4590 #[stable(feature = "str_escape", since = "1.34.0")]
4591 #[derive(Clone, Debug)]
4592 pub struct EscapeUnicode<'a> {
4593 inner: FlatMap<Chars<'a>, char::EscapeUnicode, CharEscapeUnicode>,
4596 macro_rules! escape_types_impls {
4597 ($( $Name: ident ),+) => {$(
4598 #[stable(feature = "str_escape", since = "1.34.0")]
4599 impl<'a> fmt::Display for $Name<'a> {
4600 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4601 self.clone().try_for_each(|c| f.write_char(c))
4605 #[stable(feature = "str_escape", since = "1.34.0")]
4606 impl<'a> Iterator for $Name<'a> {
4610 fn next(&mut self) -> Option<char> { self.inner.next() }
4613 fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
4616 fn try_fold<Acc, Fold, R>(&mut self, init: Acc, fold: Fold) -> R where
4617 Self: Sized, Fold: FnMut(Acc, Self::Item) -> R, R: Try<Ok=Acc>
4619 self.inner.try_fold(init, fold)
4623 fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
4624 where Fold: FnMut(Acc, Self::Item) -> Acc,
4626 self.inner.fold(init, fold)
4630 #[stable(feature = "str_escape", since = "1.34.0")]
4631 impl<'a> FusedIterator for $Name<'a> {}
4635 escape_types_impls!(EscapeDebug, EscapeDefault, EscapeUnicode);