Improve documentation for the from_utf8 family

author Steve Klabnik <steve@steveklabnik.com>

Fri, 2 Oct 2015 18:36:02 +0000 (14:36 -0400)

committer Steve Klabnik <steve@steveklabnik.com>

Fri, 2 Oct 2015 23:42:25 +0000 (19:42 -0400)
author Steve Klabnik <steve@steveklabnik.com>
Fri, 2 Oct 2015 18:36:02 +0000 (14:36 -0400)
committer Steve Klabnik <steve@steveklabnik.com>
Fri, 2 Oct 2015 23:42:25 +0000 (19:42 -0400)
diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs

index acbce825ecc3f71bb699605674f5a2b82ff07d7a..271dbffcc2e41217342112f0d12cb6109730a109 100644 (file)
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@@ -92,26 +92,61 @@ pub fn from_str(_: &str) -> String {
          panic!("not available with cfg(test)");
      }
  
-    /// Returns the vector as a string buffer, if possible, taking care not to
-    /// copy it.
+    /// Converts a vector of bytes to a `String`.
+    ///
+    /// A string slice (`&str`) is made of bytes (`u8`), and a vector of bytes
+    /// (`Vec<u8>`) is made of bytes, so this function converts between the
+    /// two. Not all byte slices are valid `String`s, however: `String`
+    /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
+    /// the bytes are valid UTF-8, and then does the conversion.
+    ///
+    /// If you are sure that the byte slice is valid UTF-8, and you don't want
+    /// to incur the overhead of the validity check, there is an unsafe version
+    /// of this function, [`from_utf8_unchecked()`][fromutf8], which has the
+    /// same behavior but skips the check.
+    ///
+    /// [fromutf8]: struct.String.html#method.from_utf8_unchecked
+    ///
+    /// This method will take care to not copy the vector, for efficiency's
+    /// sake.
+    ///
+    /// If you need a `&str` instead of a `String`, consider
+    /// [`str::from_utf8()`][str].
+    ///
+    /// [str]: ../str/fn.from_utf8.html
      ///
      /// # Failure
      ///
-    /// If the given vector is not valid UTF-8, then the original vector and the
-    /// corresponding error is returned.
+    /// Returns `Err` if the slice is not UTF-8 with a description as to why the
+    /// provided bytes are not UTF-8. The vector you moved in is also included.
      ///
      /// # Examples
      ///
+    /// Basic usage:
+    ///
      /// ```
-    /// let hello_vec = vec![104, 101, 108, 108, 111];
-    /// let s = String::from_utf8(hello_vec).unwrap();
-    /// assert_eq!(s, "hello");
-    ///
-    /// let invalid_vec = vec![240, 144, 128];
-    /// let s = String::from_utf8(invalid_vec).err().unwrap();
-    /// let err = s.utf8_error();
-    /// assert_eq!(s.into_bytes(), [240, 144, 128]);
+    /// // some bytes, in a vector
+    /// let sparkle_heart = vec![240, 159, 146, 150];
+    ///
+    /// // We know these bytes are valid, so just use `unwrap()`.
+    /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
+    ///
+    /// assert_eq!("💖", sparkle_heart);
      /// ```
+    ///
+    /// Incorrect bytes:
+    ///
+    /// ```
+    /// // some invalid bytes, in a vector
+    /// let sparkle_heart = vec![0, 159, 146, 150];
+    ///
+    /// assert!(String::from_utf8(sparkle_heart).is_err());
+    /// ```
+    ///
+    /// See the docs for [`FromUtf8Error`][error] for more details on what you
+    /// can do with this error.
+    ///
+    /// [error]: struct.FromUtf8Error.html
      #[inline]
      #[stable(feature = "rust1", since = "1.0.0")]
      pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
@@ -121,15 +156,49 @@ pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
          }
      }
  
-    /// Converts a vector of bytes to a new UTF-8 string.
-    /// Any invalid UTF-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
+    /// Converts a slice of bytes to a `String`, including invalid characters.
+    ///
+    /// A string slice (`&str`) is made of bytes (`u8`), and a slice of bytes
+    /// (`&[u8]`) is made of bytes, so this function converts between the two.
+    /// Not all byte slices are valid string slices, however: `&str` requires
+    /// that it is valid UTF-8. During this conversion, `from_utf8_lossy()`
+    /// will replace any invalid UTF-8 sequences with
+    /// `U+FFFD REPLACEMENT CHARACTER`, which looks like this: �
+    ///
+    /// If you are sure that the byte slice is valid UTF-8, and you don't want
+    /// to incur the overhead of the conversion, there is an unsafe version
+    /// of this function, [`from_utf8_unchecked()`][fromutf8], which has the
+    /// same behavior but skips the checks.
+    ///
+    /// [fromutf8]: struct.String.html#method.from_utf8_unchecked
+    ///
+    /// If you need a `&str` instead of a `String`, consider
+    /// [`str::from_utf8()`][str].
+    ///
+    /// [str]: ../str/fn.from_utf8.html
      ///
      /// # Examples
      ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// // some bytes, in a vector
+    /// let sparkle_heart = vec![240, 159, 146, 150];
+    ///
+    /// // We know these bytes are valid, so just use `unwrap()`.
+    /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
+    ///
+    /// assert_eq!("💖", sparkle_heart);
      /// ```
+    ///
+    /// Incorrect bytes:
+    ///
+    /// ```
+    /// // some invalid bytes
      /// let input = b"Hello \xF0\x90\x80World";
      /// let output = String::from_utf8_lossy(input);
-    /// assert_eq!(output, "Hello \u{FFFD}World");
+    ///
+    /// assert_eq!("Hello �World", output);
      /// ```
      #[stable(feature = "rust1", since = "1.0.0")]
      pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
@@ -309,9 +378,33 @@ pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> St
          }
      }
  
-    /// Converts a vector of bytes to a new `String` without checking if
-    /// it contains valid UTF-8. This is unsafe because it assumes that
-    /// the UTF-8-ness of the vector has already been validated.
+    /// Converts a vector of bytes to a `String` without checking that the
+    /// string contains valid UTF-8.
+    ///
+    /// See the safe version, [`from_utrf8()`][fromutf8], for more.
+    ///
+    /// [fromutf8]: struct.String.html#method.from_utf8
+    ///
+    /// # Unsafety
+    ///
+    /// This function is unsafe because it does not check that the bytes passed to
+    /// it are valid UTF-8. If this constraint is violated, undefined behavior
+    /// results, as the rest of Rust assumes that `String`s are valid UTF-8.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// // some bytes, in a vector
+    /// let sparkle_heart = vec![240, 159, 146, 150];
+    ///
+    /// let sparkle_heart = unsafe {
+    ///     String::from_utf8_unchecked(sparkle_heart)
+    /// };
+    ///
+    /// assert_eq!("💖", sparkle_heart);
+    /// ```
      #[inline]
      #[stable(feature = "rust1", since = "1.0.0")]
      pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String {
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs

index be2186945d563b2a31d1babdbac9628e925761ce..9f1439ea3880767d7362a70dffac610db1eae723 100644 (file)
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -119,7 +119,11 @@ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  Section: Creating a string
  */
  
-/// Errors which can occur when attempting to interpret a byte slice as a `str`.
+/// Errors which can occur when attempting to interpret a sequence of `u8`
+/// as a string.
+///
+/// As such, the `from_utf8` family of functions and methods for both `String`s
+/// and `&str`s make use of this error, for example.
  #[derive(Copy, Eq, PartialEq, Clone, Debug)]
  #[stable(feature = "rust1", since = "1.0.0")]
  pub struct Utf8Error {
@@ -132,21 +136,104 @@ impl Utf8Error {
      ///
      /// It is the maximum index such that `from_utf8(input[..index])`
      /// would return `Some(_)`.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// #![feature(utf8_error)]
+    ///
+    /// use std::str;
+    ///
+    /// // some invalid bytes, in a vector
+    /// let sparkle_heart = vec![0, 159, 146, 150];
+    ///
+    /// // std::str::from_utf8 returns a Utf8Error
+    /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
+    ///
+    /// // the first byte is invalid here
+    /// assert_eq!(1, error.valid_up_to());
+    /// ```
      #[unstable(feature = "utf8_error", reason = "method just added",
                 issue = "27734")]
      pub fn valid_up_to(&self) -> usize { self.valid_up_to }
  }
  
-/// Converts a slice of bytes to a string slice without performing any
-/// allocations.
+/// Converts a slice of bytes to a string slice.
  ///
-/// Once the slice has been validated as UTF-8, it is transmuted in-place and
-/// returned as a '&str' instead of a '&[u8]'
+/// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
+/// is made of bytes, so this function converts between the two. Not all byte
+/// slices are valid string slices, however: `&str` requires that it is valid
+/// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
+/// then does the conversion.
+///
+/// If you are sure that the byte slice is valid UTF-8, and you don't want to
+/// incur the overhead of the validity check, there is an unsafe version of
+/// this function, [`from_utf8_unchecked()`][fromutf8], which has the same
+/// behavior but skips the check.
+///
+/// [fromutf8]: fn.from_utf8.html
+///
+/// If you need a `String` instead of a `&str`, consider
+/// [`String::from_utf8()`][string].
+///
+/// [string]: ../string/struct.String.html#method.from_utf8
+///
+/// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
+/// it, this function is one way to have a stack-allocated string. There is
+/// an example of this in the examples section below.
  ///
  /// # Failure
  ///
  /// Returns `Err` if the slice is not UTF-8 with a description as to why the
  /// provided slice is not UTF-8.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use std::str;
+///
+/// // some bytes, in a vector
+/// let sparkle_heart = vec![240, 159, 146, 150];
+///
+/// // We know these bytes are valid, so just use `unwrap()`.
+/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
+///
+/// assert_eq!("💖", sparkle_heart);
+/// ```
+///
+/// Incorrect bytes:
+///
+/// ```
+/// use std::str;
+///
+/// // some invalid bytes, in a vector
+/// let sparkle_heart = vec![0, 159, 146, 150];
+///
+/// assert!(str::from_utf8(&sparkle_heart).is_err());
+/// ```
+///
+/// See the docs for [`Utf8Error`][error] for more details on the kinds of
+/// errors that can be returned.
+///
+/// [error]: struct.Utf8Error.html
+///
+/// A "stack allocated string":
+///
+/// ```
+/// use std::str;
+///
+/// // some bytes, in a stack-allocated array
+/// let sparkle_heart = [240, 159, 146, 150];
+///
+/// // We know these bytes are valid, so just use `unwrap()`.
+/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
+///
+/// assert_eq!("💖", sparkle_heart);
+/// ```
  #[stable(feature = "rust1", since = "1.0.0")]
  pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
      try!(run_utf8_validation_iterator(&mut v.iter()));
@@ -155,6 +242,33 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
  
  /// Converts a slice of bytes to a string slice without checking
  /// that the string contains valid UTF-8.
+///
+/// See the safe version, [`from_utrf8()`][fromutf8], for more.
+///
+/// [fromutf8]: fn.from_utf8.html
+///
+/// # Unsafety
+///
+/// This function is unsafe because it does not check that the bytes passed to
+/// it are valid UTF-8. If this constraint is violated, undefined behavior
+/// results, as the rest of Rust assumes that `&str`s are valid UTF-8.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use std::str;
+///
+/// // some bytes, in a vector
+/// let sparkle_heart = vec![240, 159, 146, 150];
+///
+/// let sparkle_heart = unsafe {
+///     str::from_utf8_unchecked(&sparkle_heart)
+/// };
+///
+/// assert_eq!("💖", sparkle_heart);
+/// ```
  #[inline(always)]
  #[stable(feature = "rust1", since = "1.0.0")]
  pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
author	Steve Klabnik <steve@steveklabnik.com>
	Fri, 2 Oct 2015 18:36:02 +0000 (14:36 -0400)
committer	Steve Klabnik <steve@steveklabnik.com>
	Fri, 2 Oct 2015 23:42:25 +0000 (19:42 -0400)
src/libcollections/string.rs		patch \| blob \| history
src/libcore/str/mod.rs		patch \| blob \| history