1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! A UTF-8 encoded, growable string.
13 //! This module contains the [`String`] type, a trait for converting
14 //! [`ToString`]s, and several error types that may result from working with
17 //! [`String`]: struct.String.html
18 //! [`ToString`]: trait.ToString.html
22 //! There are multiple ways to create a new `String` from a string literal:
25 //! let s = "Hello".to_string();
27 //! let s = String::from("world");
28 //! let s: String = "also this".into();
31 //! You can create a new `String` from an existing one by concatenating with
35 //! let s = "Hello".to_string();
37 //! let message = s + " world!";
40 //! If you have a vector of valid UTF-8 bytes, you can make a `String` out of
41 //! it. You can do the reverse too.
44 //! let sparkle_heart = vec![240, 159, 146, 150];
46 //! // We know these bytes are valid, so we'll use `unwrap()`.
47 //! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
49 //! assert_eq!("💖", sparkle_heart);
51 //! let bytes = sparkle_heart.into_bytes();
53 //! assert_eq!(bytes, [240, 159, 146, 150]);
56 #![stable(feature = "rust1", since = "1.0.0")]
60 use core::iter::FromIterator;
62 use core::ops::{self, Add};
65 use core::str::pattern::Pattern;
66 use rustc_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
67 use rustc_unicode::str as unicode_str;
70 use borrow::{Cow, IntoCow};
71 use range::RangeArgument;
72 use str::{self, FromStr, Utf8Error, Chars};
76 /// A UTF-8 encoded, growable string.
78 /// The `String` type is the most common string type that has ownership over the
79 /// contents of the string. It has a close relationship with its borrowed
80 /// counterpart, the primitive [`str`].
82 /// [`str`]: ../primitive.str.html
86 /// You can create a `String` from a literal string with `String::from`:
89 /// let hello = String::from("Hello, world!");
92 /// You can append a [`char`] to a `String` with the [`push()`] method, and
93 /// append a [`&str`] with the [`push_str()`] method:
96 /// let mut hello = String::from("Hello, ");
99 /// hello.push_str("orld!");
102 /// [`char`]: ../primitive.char.html
103 /// [`push()`]: #method.push
104 /// [`push_str()`]: #method.push_str
106 /// If you have a vector of UTF-8 bytes, you can create a `String` from it with
107 /// the [`from_utf8()`] method:
110 /// // some bytes, in a vector
111 /// let sparkle_heart = vec![240, 159, 146, 150];
113 /// // We know these bytes are valid, so we'll use `unwrap()`.
114 /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
116 /// assert_eq!("💖", sparkle_heart);
119 /// [`from_utf8()`]: #method.from_utf8
123 /// `String`s are always valid UTF-8. This has a few implications, the first of
124 /// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is
125 /// similar, but without the UTF-8 constraint. The second implication is that
126 /// you cannot index into a `String`:
131 /// println!("The first letter of s is {}", s[0]); // ERROR!!!
134 /// [`OsString`]: ../ffi/struct.OsString.html
136 /// Indexing is intended to be a constant-time operation, but UTF-8 encoding
137 /// does not allow us to do this. Furtheremore, it's not clear what sort of
138 /// thing the index should return: a byte, a codepoint, or a grapheme cluster.
139 /// The [`as_bytes()`] and [`chars()`] methods return iterators over the first
140 /// two, respectively.
142 /// [`as_bytes()`]: #method.as_bytes
143 /// [`chars()`]: #method.chars
147 /// `String`s implement [`Deref`]`<Target=str>`, and so inherit all of [`str`]'s
148 /// methods. In addition, this means that you can pass a `String` to any
149 /// function which takes a [`&str`] by using an ampersand (`&`):
152 /// fn takes_str(s: &str) { }
154 /// let s = String::from("Hello");
159 /// [`&str`]: ../primitive.str.html
160 /// [`Deref`]: ../ops/trait.Deref.html
162 /// This will create a [`&str`] from the `String` and pass it in. This
163 /// conversion is very inexpensive, and so generally, functions will accept
164 /// [`&str`]s as arguments unless they need a `String` for some specific reason.
169 /// A `String` is made up of three components: a pointer to some bytes, a
170 /// length, and a capacity. The pointer points to an internal buffer `String`
171 /// uses to store its data. The length is the number of bytes currently stored
172 /// in the buffer, and the capacity is the size of the buffer in bytes. As such,
173 /// the length will always be less than or equal to the capacity.
175 /// This buffer is always stored on the heap.
177 /// You can look at these with the [`as_ptr()`], [`len()`], and [`capacity()`]
183 /// let story = String::from("Once upon a time...");
185 /// let ptr = story.as_ptr();
186 /// let len = story.len();
187 /// let capacity = story.capacity();
189 /// // story has thirteen bytes
190 /// assert_eq!(19, len);
192 /// // Now that we have our parts, we throw the story away.
193 /// mem::forget(story);
195 /// // We can re-build a String out of ptr, len, and capacity. This is all
196 /// // unsafe becuase we are responsible for making sure the components are
198 /// let s = unsafe { String::from_raw_parts(ptr as *mut _, len, capacity) } ;
200 /// assert_eq!(String::from("Once upon a time..."), s);
203 /// [`as_ptr()`]: #method.as_ptr
204 /// [`len()`]: #method.len
205 /// [`capacity()`]: #method.capacity
207 /// If a `String` has enough capacity, adding elements to it will not
208 /// re-allocate. For example, consider this program:
211 /// let mut s = String::new();
213 /// println!("{}", s.capacity());
216 /// s.push_str("hello");
217 /// println!("{}", s.capacity());
221 /// This will output the following:
232 /// At first, we have no memory allocated at all, but as we append to the
233 /// string, it increases its capacity appropriately. If we instead use the
234 /// [`with_capacity()`] method to allocate the correct capacity initially:
237 /// let mut s = String::with_capacity(25);
239 /// println!("{}", s.capacity());
242 /// s.push_str("hello");
243 /// println!("{}", s.capacity());
247 /// [`with_capacity()`]: #method.with_capacity
249 /// We end up with a different output:
260 /// Here, there's no need to allocate more memory inside the loop.
261 #[derive(PartialOrd, Eq, Ord)]
262 #[stable(feature = "rust1", since = "1.0.0")]
267 /// A possible error value when converting a `String` from a UTF-8 byte vector.
269 /// This type is the error type for the [`from_utf8()`] method on [`String`]. It
270 /// is designed in such a way to carefully avoid reallocations: the
271 /// [`into_bytes()`] method will give back the byte vector that was used in the
272 /// conversion attempt.
274 /// [`from_utf8()`]: struct.String.html#method.from_utf8
275 /// [`String`]: struct.String.html
276 /// [`into_bytes()`]: struct.FromUtf8Error.html#method.into_bytes
278 /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
279 /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
280 /// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error`
281 /// through the [`utf8_error()`] method.
283 /// [`Utf8Error`]: ../str/struct.Utf8Error.html
284 /// [`std::str`]: ../str/index.html
285 /// [`u8`]: ../primitive.u8.html
286 /// [`&str`]: ../primitive.str.html
287 /// [`utf8_error()`]: #method.utf8_error
294 /// // some invalid bytes, in a vector
295 /// let bytes = vec![0, 159];
297 /// let value = String::from_utf8(bytes);
299 /// assert!(value.is_err());
300 /// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes());
302 #[stable(feature = "rust1", since = "1.0.0")]
304 pub struct FromUtf8Error {
309 /// A possible error value when converting a `String` from a UTF-16 byte slice.
311 /// This type is the error type for the [`from_utf16()`] method on [`String`].
313 /// [`from_utf16()`]: struct.String.html#method.from_utf16
314 /// [`String`]: struct.String.html
321 /// // 𝄞mu<invalid>ic
322 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
323 /// 0xD800, 0x0069, 0x0063];
325 /// assert!(String::from_utf16(v).is_err());
327 #[stable(feature = "rust1", since = "1.0.0")]
329 pub struct FromUtf16Error(());
332 /// Creates a new empty `String`.
334 /// Given that the `String` is empty, this will not allocate any initial
335 /// buffer. While that means that this initial operation is very
336 /// inexpensive, but may cause excessive allocation later, when you add
337 /// data. If you have an idea of how much data the `String` will hold,
338 /// consider the [`with_capacity()`] method to prevent excessive
341 /// [`with_capacity()`]: #method.with_capacity
348 /// let s = String::new();
351 #[stable(feature = "rust1", since = "1.0.0")]
352 pub fn new() -> String {
353 String { vec: Vec::new() }
356 /// Creates a new empty `String` with a particular capacity.
358 /// `String`s have an internal buffer to hold their data. The capacity is
359 /// the length of that buffer, and can be queried with the [`capacity()`]
360 /// method. This method creates an empty `String`, but one with an initial
361 /// buffer that can hold `capacity` bytes. This is useful when you may be
362 /// appending a bunch of data to the `String`, reducing the number of
363 /// reallocations it needs to do.
365 /// [`capacity()`]: #method.capacity
367 /// If the given capacity is `0`, no allocation will occur, and this method
368 /// is identical to the [`new()`] method.
370 /// [`new()`]: #method.new
377 /// let mut s = String::with_capacity(10);
379 /// // The String contains no chars, even though it has capacity for more
380 /// assert_eq!(s.len(), 0);
382 /// // These are all done without reallocating...
383 /// let cap = s.capacity();
388 /// assert_eq!(s.capacity(), cap);
390 /// // ...but this may make the vector reallocate
394 #[stable(feature = "rust1", since = "1.0.0")]
395 pub fn with_capacity(capacity: usize) -> String {
396 String { vec: Vec::with_capacity(capacity) }
399 // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is
400 // required for this method definition, is not available. Since we don't
401 // require this method for testing purposes, I'll just stub it
402 // NB see the slice::hack module in slice.rs for more information
405 pub fn from_str(_: &str) -> String {
406 panic!("not available with cfg(test)");
409 /// Converts a vector of bytes to a `String`.
411 /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a vector of bytes
412 /// ([`Vec<u8>`]) is made of bytes, so this function converts between the
413 /// two. Not all byte slices are valid `String`s, however: `String`
414 /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
415 /// the bytes are valid UTF-8, and then does the conversion.
417 /// [`&str`]: ../primitive.str.html
418 /// [`u8`]: ../primitive.u8.html
419 /// [`Vec<u8>`]: ../vec/struct.Vec.html
421 /// If you are sure that the byte slice is valid UTF-8, and you don't want
422 /// to incur the overhead of the validity check, there is an unsafe version
423 /// of this function, [`from_utf8_unchecked()`], which has the same behavior
424 /// but skips the check.
426 /// [`from_utf8_unchecked()`]: struct.String.html#method.from_utf8_unchecked
428 /// This method will take care to not copy the vector, for efficiency's
431 /// If you need a `&str` instead of a `String`, consider
432 /// [`str::from_utf8()`].
434 /// [`str::from_utf8()`]: ../str/fn.from_utf8.html
438 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
439 /// provided bytes are not UTF-8. The vector you moved in is also included.
446 /// // some bytes, in a vector
447 /// let sparkle_heart = vec![240, 159, 146, 150];
449 /// // We know these bytes are valid, so we'll use `unwrap()`.
450 /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
452 /// assert_eq!("💖", sparkle_heart);
458 /// // some invalid bytes, in a vector
459 /// let sparkle_heart = vec![0, 159, 146, 150];
461 /// assert!(String::from_utf8(sparkle_heart).is_err());
464 /// See the docs for [`FromUtf8Error`] for more details on what you can do
467 /// [`FromUtf8Error`]: struct.FromUtf8Error.html
469 #[stable(feature = "rust1", since = "1.0.0")]
470 pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
471 match str::from_utf8(&vec) {
472 Ok(..) => Ok(String { vec: vec }),
482 /// Converts a slice of bytes to a `String`, including invalid characters.
484 /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a slice of
485 /// bytes ([`&[u8]`][byteslice]) is made of bytes, so this function converts between
486 /// the two. Not all byte slices are valid string slices, however: [`&str`]
487 /// requires that it is valid UTF-8. During this conversion,
488 /// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with
489 /// `U+FFFD REPLACEMENT CHARACTER`, which looks like this: �
491 /// [`&str`]: ../primitive.str.html
492 /// [`u8`]: ../primitive.u8.html
493 /// [byteslice]: ../primitive.slice.html
495 /// If you are sure that the byte slice is valid UTF-8, and you don't want
496 /// to incur the overhead of the conversion, there is an unsafe version
497 /// of this function, [`from_utf8_unchecked()`], which has the same behavior
498 /// but skips the checks.
500 /// [`from_utf8_unchecked()`]: struct.String.html#method.from_utf8_unchecked
502 /// If you need a [`&str`] instead of a `String`, consider
503 /// [`str::from_utf8()`].
505 /// [`str::from_utf8()`]: ../str/fn.from_utf8.html
512 /// // some bytes, in a vector
513 /// let sparkle_heart = vec![240, 159, 146, 150];
515 /// // We know these bytes are valid, so we'll use `unwrap()`.
516 /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
518 /// assert_eq!("💖", sparkle_heart);
524 /// // some invalid bytes
525 /// let input = b"Hello \xF0\x90\x80World";
526 /// let output = String::from_utf8_lossy(input);
528 /// assert_eq!("Hello �World", output);
530 #[stable(feature = "rust1", since = "1.0.0")]
531 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
533 match str::from_utf8(v) {
534 Ok(s) => return Cow::Borrowed(s),
535 Err(e) => i = e.valid_up_to(),
538 const TAG_CONT_U8: u8 = 128;
539 const REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
541 fn unsafe_get(xs: &[u8], i: usize) -> u8 {
542 unsafe { *xs.get_unchecked(i) }
544 fn safe_get(xs: &[u8], i: usize, total: usize) -> u8 {
552 let mut res = String::with_capacity(total);
555 unsafe { res.as_mut_vec().extend_from_slice(&v[..i]) };
558 // subseqidx is the index of the first byte of the subsequence we're
559 // looking at. It's used to copy a bunch of contiguous good codepoints
560 // at once instead of copying them one by one.
561 let mut subseqidx = i;
565 let byte = unsafe_get(v, i);
568 macro_rules! error { () => ({
571 res.as_mut_vec().extend_from_slice(&v[subseqidx..i_]);
574 res.as_mut_vec().extend_from_slice(REPLACEMENT);
579 // subseqidx handles this
581 let w = unicode_str::utf8_char_width(byte);
585 if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
592 match (byte, safe_get(v, i, total)) {
593 (0xE0, 0xA0...0xBF) => (),
594 (0xE1...0xEC, 0x80...0xBF) => (),
595 (0xED, 0x80...0x9F) => (),
596 (0xEE...0xEF, 0x80...0xBF) => (),
603 if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
610 match (byte, safe_get(v, i, total)) {
611 (0xF0, 0x90...0xBF) => (),
612 (0xF1...0xF3, 0x80...0xBF) => (),
613 (0xF4, 0x80...0x8F) => (),
620 if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
625 if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
638 if subseqidx < total {
639 unsafe { res.as_mut_vec().extend_from_slice(&v[subseqidx..total]) };
644 /// Decode a UTF-16 encoded vector `v` into a `String`, returning `Err`
645 /// if `v` contains any invalid data.
653 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
654 /// 0x0073, 0x0069, 0x0063];
655 /// assert_eq!(String::from("𝄞music"),
656 /// String::from_utf16(v).unwrap());
658 /// // 𝄞mu<invalid>ic
659 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
660 /// 0xD800, 0x0069, 0x0063];
661 /// assert!(String::from_utf16(v).is_err());
663 #[stable(feature = "rust1", since = "1.0.0")]
664 pub fn from_utf16(v: &[u16]) -> Result<String, FromUtf16Error> {
665 decode_utf16(v.iter().cloned()).collect::<Result<_, _>>().map_err(|_| FromUtf16Error(()))
668 /// Decode a UTF-16 encoded vector `v` into a string, replacing
669 /// invalid data with the replacement character (U+FFFD).
676 /// // 𝄞mus<invalid>ic<invalid>
677 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
678 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
681 /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
682 /// String::from_utf16_lossy(v));
685 #[stable(feature = "rust1", since = "1.0.0")]
686 pub fn from_utf16_lossy(v: &[u16]) -> String {
687 decode_utf16(v.iter().cloned()).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect()
690 /// Creates a new `String` from a length, capacity, and pointer.
694 /// This is highly unsafe, due to the number of invariants that aren't
697 /// * The memory at `ptr` needs to have been previously allocated by the
698 /// same allocator the standard library uses.
699 /// * `length` needs to be less than or equal to `capacity`.
700 /// * `capacity` needs to be the correct value.
702 /// Violating these may cause problems like corrupting the allocator's
703 /// internal datastructures.
713 /// let s = String::from("hello");
714 /// let ptr = s.as_ptr();
715 /// let len = s.len();
716 /// let capacity = s.capacity();
720 /// let s = String::from_raw_parts(ptr as *mut _, len, capacity);
722 /// assert_eq!(String::from("hello"), s);
726 #[stable(feature = "rust1", since = "1.0.0")]
727 pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> String {
728 String { vec: Vec::from_raw_parts(buf, length, capacity) }
731 /// Converts a vector of bytes to a `String` without checking that the
732 /// string contains valid UTF-8.
734 /// See the safe version, [`from_utf8()`], for more details.
736 /// [`from_utf8()`]: struct.String.html#method.from_utf8
740 /// This function is unsafe because it does not check that the bytes passed
741 /// to it are valid UTF-8. If this constraint is violated, it may cause
742 /// memory unsafety issues with future users of the `String`, as the rest of
743 /// the standard library assumes that `String`s are valid UTF-8.
750 /// // some bytes, in a vector
751 /// let sparkle_heart = vec![240, 159, 146, 150];
753 /// let sparkle_heart = unsafe {
754 /// String::from_utf8_unchecked(sparkle_heart)
757 /// assert_eq!("💖", sparkle_heart);
760 #[stable(feature = "rust1", since = "1.0.0")]
761 pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String {
762 String { vec: bytes }
765 /// Converts a `String` into a byte vector.
767 /// This consumes the `String`, so we do not need to copy its contents.
774 /// let s = String::from("hello");
775 /// let bytes = s.into_bytes();
777 /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]);
780 #[stable(feature = "rust1", since = "1.0.0")]
781 pub fn into_bytes(self) -> Vec<u8> {
785 /// Extracts a string slice containing the entire string.
787 #[stable(feature = "string_as_str", since = "1.7.0")]
788 pub fn as_str(&self) -> &str {
792 /// Extracts a string slice containing the entire string.
794 #[stable(feature = "string_as_str", since = "1.7.0")]
795 pub fn as_mut_str(&mut self) -> &mut str {
799 /// Appends a given string slice onto the end of this `String`.
806 /// let mut s = String::from("foo");
808 /// s.push_str("bar");
810 /// assert_eq!("foobar", s);
813 #[stable(feature = "rust1", since = "1.0.0")]
814 pub fn push_str(&mut self, string: &str) {
815 self.vec.extend_from_slice(string.as_bytes())
818 /// Returns this `String`'s capacity, in bytes.
825 /// let s = String::with_capacity(10);
827 /// assert!(s.capacity() >= 10);
830 #[stable(feature = "rust1", since = "1.0.0")]
831 pub fn capacity(&self) -> usize {
835 /// Ensures that this `String`'s capacity is at least `additional` bytes
836 /// larger than its length.
838 /// The capacity may be increased by more than `additional` bytes if it
839 /// chooses, to prevent frequent reallocations.
841 /// If you do not want this "at least" behavior, see the [`reserve_exact()`]
844 /// [`reserve_exact()`]: #method.reserve_exact
848 /// Panics if the new capacity overflows `usize`.
855 /// let mut s = String::new();
859 /// assert!(s.capacity() >= 10);
862 /// This may not actually increase the capacity:
865 /// let mut s = String::with_capacity(10);
869 /// // s now has a length of 2 and a capacity of 10
870 /// assert_eq!(2, s.len());
871 /// assert_eq!(10, s.capacity());
873 /// // Since we already have an extra 8 capacity, calling this...
876 /// // ... doesn't actually increase.
877 /// assert_eq!(10, s.capacity());
880 #[stable(feature = "rust1", since = "1.0.0")]
881 pub fn reserve(&mut self, additional: usize) {
882 self.vec.reserve(additional)
885 /// Ensures that this `String`'s capacity is `additional` bytes
886 /// larger than its length.
888 /// Consider using the [`reserve()`] method unless you absolutely know
889 /// better than the allocator.
891 /// [`reserve()`]: #method.reserve
895 /// Panics if the new capacity overflows `usize`.
902 /// let mut s = String::new();
904 /// s.reserve_exact(10);
906 /// assert!(s.capacity() >= 10);
909 /// This may not actually increase the capacity:
912 /// let mut s = String::with_capacity(10);
916 /// // s now has a length of 2 and a capacity of 10
917 /// assert_eq!(2, s.len());
918 /// assert_eq!(10, s.capacity());
920 /// // Since we already have an extra 8 capacity, calling this...
921 /// s.reserve_exact(8);
923 /// // ... doesn't actually increase.
924 /// assert_eq!(10, s.capacity());
927 #[stable(feature = "rust1", since = "1.0.0")]
928 pub fn reserve_exact(&mut self, additional: usize) {
929 self.vec.reserve_exact(additional)
932 /// Shrinks the capacity of this `String` to match its length.
939 /// let mut s = String::from("foo");
942 /// assert!(s.capacity() >= 100);
944 /// s.shrink_to_fit();
945 /// assert_eq!(3, s.capacity());
948 #[stable(feature = "rust1", since = "1.0.0")]
949 pub fn shrink_to_fit(&mut self) {
950 self.vec.shrink_to_fit()
953 /// Appends the given `char` to the end of this `String`.
960 /// let mut s = String::from("abc");
966 /// assert_eq!("abc123", s);
969 #[stable(feature = "rust1", since = "1.0.0")]
970 pub fn push(&mut self, ch: char) {
971 match ch.len_utf8() {
972 1 => self.vec.push(ch as u8),
974 let cur_len = self.len();
975 // This may use up to 4 bytes.
976 self.vec.reserve(ch_len);
979 // Attempt to not use an intermediate buffer by just pushing bytes
980 // directly onto this string.
981 let slice = slice::from_raw_parts_mut(self.vec
983 .offset(cur_len as isize),
985 let used = ch.encode_utf8(slice).unwrap_or(0);
986 self.vec.set_len(cur_len + used);
992 /// Returns a byte slice of this `String`'s contents.
999 /// let s = String::from("hello");
1001 /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
1004 #[stable(feature = "rust1", since = "1.0.0")]
1005 pub fn as_bytes(&self) -> &[u8] {
1009 /// Shortens this `String` to the specified length.
1013 /// Panics if `new_len` > current length, or if `new_len` does not lie on a
1014 /// [`char`] boundary.
1016 /// [`char`]: ../primitive.char.html
1023 /// let mut s = String::from("hello");
1027 /// assert_eq!("he", s);
1030 #[stable(feature = "rust1", since = "1.0.0")]
1031 pub fn truncate(&mut self, new_len: usize) {
1032 assert!(self.is_char_boundary(new_len));
1033 self.vec.truncate(new_len)
1036 /// Removes the last character from the string buffer and returns it.
1038 /// Returns `None` if this `String` is empty.
1045 /// let mut s = String::from("foo");
1047 /// assert_eq!(s.pop(), Some('o'));
1048 /// assert_eq!(s.pop(), Some('o'));
1049 /// assert_eq!(s.pop(), Some('f'));
1051 /// assert_eq!(s.pop(), None);
1054 #[stable(feature = "rust1", since = "1.0.0")]
1055 pub fn pop(&mut self) -> Option<char> {
1056 let len = self.len();
1061 let ch = self.char_at_reverse(len);
1063 self.vec.set_len(len - ch.len_utf8());
1068 /// Removes a `char` from this `String` at a byte position and returns it.
1070 /// This is an `O(n)` operation, as it requires copying every element in the
1075 /// Panics if `idx` is larger than or equal to the `String`'s length,
1076 /// or if it does not lie on a [`char`] boundary.
1078 /// [`char`]: ../primitive.char.html
1085 /// let mut s = String::from("foo");
1087 /// assert_eq!(s.remove(0), 'f');
1088 /// assert_eq!(s.remove(1), 'o');
1089 /// assert_eq!(s.remove(0), 'o');
1092 #[stable(feature = "rust1", since = "1.0.0")]
1093 pub fn remove(&mut self, idx: usize) -> char {
1094 let len = self.len();
1097 let ch = self.char_at(idx);
1098 let next = idx + ch.len_utf8();
1100 ptr::copy(self.vec.as_ptr().offset(next as isize),
1101 self.vec.as_mut_ptr().offset(idx as isize),
1103 self.vec.set_len(len - (next - idx));
1108 /// Inserts a character into this `String` at a byte position.
1110 /// This is an `O(n)` operation as it requires copying every element in the
1115 /// Panics if `idx` is larger than the `String`'s length, or if it does not
1116 /// lie on a [`char`] boundary.
1118 /// [`char`]: ../primitive.char.html
1125 /// let mut s = String::with_capacity(3);
1127 /// s.insert(0, 'f');
1128 /// s.insert(1, 'o');
1129 /// s.insert(2, 'o');
1131 /// assert_eq!("foo", s);
1134 #[stable(feature = "rust1", since = "1.0.0")]
1135 pub fn insert(&mut self, idx: usize, ch: char) {
1136 let len = self.len();
1137 assert!(idx <= len);
1138 assert!(self.is_char_boundary(idx));
1139 self.vec.reserve(4);
1140 let mut bits = [0; 4];
1141 let amt = ch.encode_utf8(&mut bits).unwrap();
1144 ptr::copy(self.vec.as_ptr().offset(idx as isize),
1145 self.vec.as_mut_ptr().offset((idx + amt) as isize),
1147 ptr::copy(bits.as_ptr(),
1148 self.vec.as_mut_ptr().offset(idx as isize),
1150 self.vec.set_len(len + amt);
1154 /// Returns a mutable reference to the contents of this `String`.
1158 /// This function is unsafe because it does not check that the bytes passed
1159 /// to it are valid UTF-8. If this constraint is violated, it may cause
1160 /// memory unsafety issues with future users of the `String`, as the rest of
1161 /// the standard library assumes that `String`s are valid UTF-8.
1168 /// let mut s = String::from("hello");
1171 /// let vec = s.as_mut_vec();
1172 /// assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]);
1176 /// assert_eq!(s, "olleh");
1179 #[stable(feature = "rust1", since = "1.0.0")]
1180 pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> {
1184 /// Returns the length of this `String`, in bytes.
1191 /// let a = String::from("foo");
1193 /// assert_eq!(a.len(), 3);
1196 #[stable(feature = "rust1", since = "1.0.0")]
1197 pub fn len(&self) -> usize {
1201 /// Returns `true` if this `String` has a length of zero.
1203 /// Returns `false` otherwise.
1210 /// let mut v = String::new();
1211 /// assert!(v.is_empty());
1214 /// assert!(!v.is_empty());
1217 #[stable(feature = "rust1", since = "1.0.0")]
1218 pub fn is_empty(&self) -> bool {
1222 /// Truncates this `String`, removing all contents.
1224 /// While this means the `String` will have a length of zero, it does not
1225 /// touch its capacity.
1232 /// let mut s = String::from("foo");
1236 /// assert!(s.is_empty());
1237 /// assert_eq!(0, s.len());
1238 /// assert_eq!(3, s.capacity());
1241 #[stable(feature = "rust1", since = "1.0.0")]
1242 pub fn clear(&mut self) {
1246 /// Create a draining iterator that removes the specified range in the string
1247 /// and yields the removed chars.
1249 /// Note: The element range is removed even if the iterator is not
1250 /// consumed until the end.
1254 /// Panics if the starting point or end point do not lie on a [`char`]
1255 /// boundary, or if they're out of bounds.
1257 /// [`char`]: ../primitive.char.html
1264 /// let mut s = String::from("α is alpha, β is beta");
1265 /// let beta_offset = s.find('β').unwrap_or(s.len());
1267 /// // Remove the range up until the β from the string
1268 /// let t: String = s.drain(..beta_offset).collect();
1269 /// assert_eq!(t, "α is alpha, ");
1270 /// assert_eq!(s, "β is beta");
1272 /// // A full range clears the string
1274 /// assert_eq!(s, "");
1276 #[stable(feature = "drain", since = "1.6.0")]
1277 pub fn drain<R>(&mut self, range: R) -> Drain
1278 where R: RangeArgument<usize>
1282 // The String version of Drain does not have the memory safety issues
1283 // of the vector version. The data is just plain bytes.
1284 // Because the range removal happens in Drop, if the Drain iterator is leaked,
1285 // the removal will not happen.
1286 let len = self.len();
1287 let start = *range.start().unwrap_or(&0);
1288 let end = *range.end().unwrap_or(&len);
1290 // Take out two simultaneous borrows. The &mut String won't be accessed
1291 // until iteration is over, in Drop.
1292 let self_ptr = self as *mut _;
1293 // slicing does the appropriate bounds checks
1294 let chars_iter = self[start..end].chars();
1304 /// Converts this `String` into a `Box<str>`.
1306 /// This will drop any excess capacity.
1313 /// let s = String::from("hello");
1315 /// let b = s.into_boxed_str();
1317 #[stable(feature = "box_str", since = "1.4.0")]
1318 pub fn into_boxed_str(self) -> Box<str> {
1319 let slice = self.vec.into_boxed_slice();
1320 unsafe { mem::transmute::<Box<[u8]>, Box<str>>(slice) }
1324 impl FromUtf8Error {
1325 /// Returns the bytes that were attempted to convert to a `String`.
1327 /// This method is carefully constructed to avoid allocation. It will
1328 /// consume the error, moving out the bytes, so that a copy of the bytes
1329 /// does not need to be made.
1336 /// // some invalid bytes, in a vector
1337 /// let bytes = vec![0, 159];
1339 /// let value = String::from_utf8(bytes);
1341 /// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes());
1343 #[stable(feature = "rust1", since = "1.0.0")]
1344 pub fn into_bytes(self) -> Vec<u8> {
1348 /// Fetch a `Utf8Error` to get more details about the conversion failure.
1350 /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
1351 /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's
1352 /// an analogue to `FromUtf8Error`. See its documentation for more details
1355 /// [`Utf8Error`]: ../str/struct.Utf8Error.html
1356 /// [`std::str`]: ../str/index.html
1357 /// [`u8`]: ../primitive.u8.html
1358 /// [`&str`]: ../primitive.str.html
1365 /// // some invalid bytes, in a vector
1366 /// let bytes = vec![0, 159];
1368 /// let error = String::from_utf8(bytes).unwrap_err().utf8_error();
1370 /// // the first byte is invalid here
1371 /// assert_eq!(1, error.valid_up_to());
1373 #[stable(feature = "rust1", since = "1.0.0")]
1374 pub fn utf8_error(&self) -> Utf8Error {
1379 #[stable(feature = "rust1", since = "1.0.0")]
1380 impl fmt::Display for FromUtf8Error {
1381 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1382 fmt::Display::fmt(&self.error, f)
1386 #[stable(feature = "rust1", since = "1.0.0")]
1387 impl fmt::Display for FromUtf16Error {
1388 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1389 fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
1393 #[stable(feature = "rust1", since = "1.0.0")]
1394 impl Clone for String {
1395 fn clone(&self) -> Self {
1396 String { vec: self.vec.clone() }
1399 fn clone_from(&mut self, source: &Self) {
1400 self.vec.clone_from(&source.vec);
1404 #[stable(feature = "rust1", since = "1.0.0")]
1405 impl FromIterator<char> for String {
1406 fn from_iter<I: IntoIterator<Item = char>>(iterable: I) -> String {
1407 let mut buf = String::new();
1408 buf.extend(iterable);
1413 #[stable(feature = "rust1", since = "1.0.0")]
1414 impl<'a> FromIterator<&'a str> for String {
1415 fn from_iter<I: IntoIterator<Item = &'a str>>(iterable: I) -> String {
1416 let mut buf = String::new();
1417 buf.extend(iterable);
1422 #[stable(feature = "extend_string", since = "1.4.0")]
1423 impl FromIterator<String> for String {
1424 fn from_iter<I: IntoIterator<Item = String>>(iterable: I) -> String {
1425 let mut buf = String::new();
1426 buf.extend(iterable);
1431 #[stable(feature = "rust1", since = "1.0.0")]
1432 impl Extend<char> for String {
1433 fn extend<I: IntoIterator<Item = char>>(&mut self, iterable: I) {
1434 let iterator = iterable.into_iter();
1435 let (lower_bound, _) = iterator.size_hint();
1436 self.reserve(lower_bound);
1437 for ch in iterator {
1443 #[stable(feature = "extend_ref", since = "1.2.0")]
1444 impl<'a> Extend<&'a char> for String {
1445 fn extend<I: IntoIterator<Item = &'a char>>(&mut self, iterable: I) {
1446 self.extend(iterable.into_iter().cloned());
1450 #[stable(feature = "rust1", since = "1.0.0")]
1451 impl<'a> Extend<&'a str> for String {
1452 fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iterable: I) {
1459 #[stable(feature = "extend_string", since = "1.4.0")]
1460 impl Extend<String> for String {
1461 fn extend<I: IntoIterator<Item = String>>(&mut self, iterable: I) {
1468 /// A convenience impl that delegates to the impl for `&str`
1469 #[unstable(feature = "pattern",
1470 reason = "API not fully fleshed out and ready to be stabilized",
1472 impl<'a, 'b> Pattern<'a> for &'b String {
1473 type Searcher = <&'b str as Pattern<'a>>::Searcher;
1475 fn into_searcher(self, haystack: &'a str) -> <&'b str as Pattern<'a>>::Searcher {
1476 self[..].into_searcher(haystack)
1480 fn is_contained_in(self, haystack: &'a str) -> bool {
1481 self[..].is_contained_in(haystack)
1485 fn is_prefix_of(self, haystack: &'a str) -> bool {
1486 self[..].is_prefix_of(haystack)
1490 #[stable(feature = "rust1", since = "1.0.0")]
1491 impl PartialEq for String {
1493 fn eq(&self, other: &String) -> bool {
1494 PartialEq::eq(&self[..], &other[..])
1497 fn ne(&self, other: &String) -> bool {
1498 PartialEq::ne(&self[..], &other[..])
1502 macro_rules! impl_eq {
1503 ($lhs:ty, $rhs: ty) => {
1504 #[stable(feature = "rust1", since = "1.0.0")]
1505 impl<'a, 'b> PartialEq<$rhs> for $lhs {
1507 fn eq(&self, other: &$rhs) -> bool { PartialEq::eq(&self[..], &other[..]) }
1509 fn ne(&self, other: &$rhs) -> bool { PartialEq::ne(&self[..], &other[..]) }
1512 #[stable(feature = "rust1", since = "1.0.0")]
1513 impl<'a, 'b> PartialEq<$lhs> for $rhs {
1515 fn eq(&self, other: &$lhs) -> bool { PartialEq::eq(&self[..], &other[..]) }
1517 fn ne(&self, other: &$lhs) -> bool { PartialEq::ne(&self[..], &other[..]) }
1523 impl_eq! { String, str }
1524 impl_eq! { String, &'a str }
1525 impl_eq! { Cow<'a, str>, str }
1526 impl_eq! { Cow<'a, str>, &'b str }
1527 impl_eq! { Cow<'a, str>, String }
1529 #[stable(feature = "rust1", since = "1.0.0")]
1530 impl Default for String {
1532 fn default() -> String {
1537 #[stable(feature = "rust1", since = "1.0.0")]
1538 impl fmt::Display for String {
1540 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1541 fmt::Display::fmt(&**self, f)
1545 #[stable(feature = "rust1", since = "1.0.0")]
1546 impl fmt::Debug for String {
1548 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1549 fmt::Debug::fmt(&**self, f)
1553 #[stable(feature = "rust1", since = "1.0.0")]
1554 impl hash::Hash for String {
1556 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
1557 (**self).hash(hasher)
1561 #[stable(feature = "rust1", since = "1.0.0")]
1562 impl<'a> Add<&'a str> for String {
1563 type Output = String;
1566 fn add(mut self, other: &str) -> String {
1567 self.push_str(other);
1572 #[stable(feature = "rust1", since = "1.0.0")]
1573 impl ops::Index<ops::Range<usize>> for String {
1577 fn index(&self, index: ops::Range<usize>) -> &str {
1581 #[stable(feature = "rust1", since = "1.0.0")]
1582 impl ops::Index<ops::RangeTo<usize>> for String {
1586 fn index(&self, index: ops::RangeTo<usize>) -> &str {
1590 #[stable(feature = "rust1", since = "1.0.0")]
1591 impl ops::Index<ops::RangeFrom<usize>> for String {
1595 fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1599 #[stable(feature = "rust1", since = "1.0.0")]
1600 impl ops::Index<ops::RangeFull> for String {
1604 fn index(&self, _index: ops::RangeFull) -> &str {
1605 unsafe { str::from_utf8_unchecked(&self.vec) }
1609 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1610 impl ops::IndexMut<ops::Range<usize>> for String {
1612 fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
1613 &mut self[..][index]
1616 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1617 impl ops::IndexMut<ops::RangeTo<usize>> for String {
1619 fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
1620 &mut self[..][index]
1623 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1624 impl ops::IndexMut<ops::RangeFrom<usize>> for String {
1626 fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
1627 &mut self[..][index]
1630 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1631 impl ops::IndexMut<ops::RangeFull> for String {
1633 fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
1634 unsafe { mem::transmute(&mut *self.vec) }
1638 #[stable(feature = "rust1", since = "1.0.0")]
1639 impl ops::Deref for String {
1643 fn deref(&self) -> &str {
1644 unsafe { str::from_utf8_unchecked(&self.vec) }
1648 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1649 impl ops::DerefMut for String {
1651 fn deref_mut(&mut self) -> &mut str {
1652 unsafe { mem::transmute(&mut *self.vec) }
1656 /// An error when parsing a `String`.
1658 /// This `enum` is slightly awkward: it will never actually exist. This error is
1659 /// part of the type signature of the implementation of [`FromStr`] on
1660 /// [`String`]. The return type of [`from_str()`], requires that an error be
1661 /// defined, but, given that a [`String`] can always be made into a new
1662 /// [`String`] without error, this type will never actually be returned. As
1663 /// such, it is only here to satisfy said signature, and is useless otherwise.
1665 /// [`FromStr`]: ../str/trait.FromStr.html
1666 /// [`String`]: struct.String.html
1667 /// [`from_str()`]: ../str/trait.FromStr.html#tymethod.from_str
1668 #[stable(feature = "str_parse_error", since = "1.5.0")]
1670 pub enum ParseError {}
1672 #[stable(feature = "rust1", since = "1.0.0")]
1673 impl FromStr for String {
1674 type Err = ParseError;
1676 fn from_str(s: &str) -> Result<String, ParseError> {
1681 #[stable(feature = "str_parse_error", since = "1.5.0")]
1682 impl Clone for ParseError {
1683 fn clone(&self) -> ParseError {
1688 #[stable(feature = "str_parse_error", since = "1.5.0")]
1689 impl fmt::Debug for ParseError {
1690 fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
1695 #[stable(feature = "str_parse_error2", since = "1.8.0")]
1696 impl fmt::Display for ParseError {
1697 fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
1702 #[stable(feature = "str_parse_error", since = "1.5.0")]
1703 impl PartialEq for ParseError {
1704 fn eq(&self, _: &ParseError) -> bool {
1709 #[stable(feature = "str_parse_error", since = "1.5.0")]
1710 impl Eq for ParseError {}
1712 /// A trait for converting a value to a `String`.
1714 /// This trait is automatically implemented for any type which implements the
1715 /// [`Display`] trait. As such, `ToString` shouldn't be implemented directly:
1716 /// [`Display`] should be implemented instead, and you get the `ToString`
1717 /// implementation for free.
1719 /// [`Display`]: ../fmt/trait.Display.html
1720 #[stable(feature = "rust1", since = "1.0.0")]
1721 pub trait ToString {
1722 /// Converts the given value to a `String`.
1730 /// let five = String::from("5");
1732 /// assert_eq!(five, i.to_string());
1734 #[stable(feature = "rust1", since = "1.0.0")]
1735 fn to_string(&self) -> String;
1738 #[stable(feature = "rust1", since = "1.0.0")]
1739 impl<T: fmt::Display + ?Sized> ToString for T {
1741 fn to_string(&self) -> String {
1742 use core::fmt::Write;
1743 let mut buf = String::new();
1744 let _ = buf.write_fmt(format_args!("{}", self));
1745 buf.shrink_to_fit();
1750 #[stable(feature = "rust1", since = "1.0.0")]
1751 impl AsRef<str> for String {
1753 fn as_ref(&self) -> &str {
1758 #[stable(feature = "rust1", since = "1.0.0")]
1759 impl AsRef<[u8]> for String {
1761 fn as_ref(&self) -> &[u8] {
1766 #[stable(feature = "rust1", since = "1.0.0")]
1767 impl<'a> From<&'a str> for String {
1770 fn from(s: &'a str) -> String {
1771 String { vec: <[_]>::to_vec(s.as_bytes()) }
1774 // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is
1775 // required for this method definition, is not available. Since we don't
1776 // require this method for testing purposes, I'll just stub it
1777 // NB see the slice::hack module in slice.rs for more information
1780 fn from(_: &str) -> String {
1781 panic!("not available with cfg(test)");
1785 #[stable(feature = "rust1", since = "1.0.0")]
1786 impl<'a> From<&'a str> for Cow<'a, str> {
1788 fn from(s: &'a str) -> Cow<'a, str> {
1793 #[stable(feature = "rust1", since = "1.0.0")]
1794 impl<'a> From<String> for Cow<'a, str> {
1796 fn from(s: String) -> Cow<'a, str> {
1801 #[stable(feature = "rust1", since = "1.0.0")]
1802 impl Into<Vec<u8>> for String {
1803 fn into(self) -> Vec<u8> {
1808 #[unstable(feature = "into_cow", reason = "may be replaced by `convert::Into`",
1810 #[allow(deprecated)]
1811 impl IntoCow<'static, str> for String {
1813 fn into_cow(self) -> Cow<'static, str> {
1818 #[unstable(feature = "into_cow", reason = "may be replaced by `convert::Into`",
1820 #[allow(deprecated)]
1821 impl<'a> IntoCow<'a, str> for &'a str {
1823 fn into_cow(self) -> Cow<'a, str> {
1828 #[stable(feature = "rust1", since = "1.0.0")]
1829 impl fmt::Write for String {
1831 fn write_str(&mut self, s: &str) -> fmt::Result {
1837 fn write_char(&mut self, c: char) -> fmt::Result {
1843 /// A draining iterator for `String`.
1844 #[stable(feature = "drain", since = "1.6.0")]
1845 pub struct Drain<'a> {
1846 /// Will be used as &'a mut String in the destructor
1847 string: *mut String,
1848 /// Start of part to remove
1850 /// End of part to remove
1852 /// Current remaining range to remove
1856 #[stable(feature = "drain", since = "1.6.0")]
1857 unsafe impl<'a> Sync for Drain<'a> {}
1858 #[stable(feature = "drain", since = "1.6.0")]
1859 unsafe impl<'a> Send for Drain<'a> {}
1861 #[stable(feature = "drain", since = "1.6.0")]
1862 impl<'a> Drop for Drain<'a> {
1863 fn drop(&mut self) {
1865 // Use Vec::drain. "Reaffirm" the bounds checks to avoid
1866 // panic code being inserted again.
1867 let self_vec = (*self.string).as_mut_vec();
1868 if self.start <= self.end && self.end <= self_vec.len() {
1869 self_vec.drain(self.start..self.end);
1875 #[stable(feature = "drain", since = "1.6.0")]
1876 impl<'a> Iterator for Drain<'a> {
1880 fn next(&mut self) -> Option<char> {
1884 fn size_hint(&self) -> (usize, Option<usize>) {
1885 self.iter.size_hint()
1889 #[stable(feature = "drain", since = "1.6.0")]
1890 impl<'a> DoubleEndedIterator for Drain<'a> {
1892 fn next_back(&mut self) -> Option<char> {
1893 self.iter.next_back()