library/alloc/src/str.rs

   1 //! Utilities for the `str` primitive type.
   2 //!
   3 //! *[See also the `str` primitive type](str).*
   4
   5 #![stable(feature = "rust1", since = "1.0.0")]
   6 // Many of the usings in this module are only used in the test configuration.
   7 // It's cleaner to just turn off the unused_imports warning than to fix them.
   8 #![allow(unused_imports)]
   9
  10 use core::borrow::{Borrow, BorrowMut};
  11 use core::iter::FusedIterator;
  12 use core::mem;
  13 use core::ptr;
  14 use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher};
  15 use core::unicode::conversions;
  16
  17 use crate::borrow::ToOwned;
  18 use crate::boxed::Box;
  19 use crate::slice::{Concat, Join, SliceIndex};
  20 use crate::string::String;
  21 use crate::vec::Vec;
  22
  23 #[stable(feature = "rust1", since = "1.0.0")]
  24 pub use core::str::pattern;
  25 #[stable(feature = "encode_utf16", since = "1.8.0")]
  26 pub use core::str::EncodeUtf16;
  27 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
  28 pub use core::str::SplitAsciiWhitespace;
  29 #[stable(feature = "split_inclusive", since = "1.51.0")]
  30 pub use core::str::SplitInclusive;
  31 #[stable(feature = "rust1", since = "1.0.0")]
  32 pub use core::str::SplitWhitespace;
  33 #[stable(feature = "rust1", since = "1.0.0")]
  34 pub use core::str::{from_utf8, from_utf8_mut, Bytes, CharIndices, Chars};
  35 #[stable(feature = "rust1", since = "1.0.0")]
  36 pub use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut, ParseBoolError};
  37 #[stable(feature = "str_escape", since = "1.34.0")]
  38 pub use core::str::{EscapeDebug, EscapeDefault, EscapeUnicode};
  39 #[stable(feature = "rust1", since = "1.0.0")]
  40 pub use core::str::{FromStr, Utf8Error};
  41 #[allow(deprecated)]
  42 #[stable(feature = "rust1", since = "1.0.0")]
  43 pub use core::str::{Lines, LinesAny};
  44 #[stable(feature = "rust1", since = "1.0.0")]
  45 pub use core::str::{MatchIndices, RMatchIndices};
  46 #[stable(feature = "rust1", since = "1.0.0")]
  47 pub use core::str::{Matches, RMatches};
  48 #[stable(feature = "rust1", since = "1.0.0")]
  49 pub use core::str::{RSplit, Split};
  50 #[stable(feature = "rust1", since = "1.0.0")]
  51 pub use core::str::{RSplitN, SplitN};
  52 #[stable(feature = "rust1", since = "1.0.0")]
  53 pub use core::str::{RSplitTerminator, SplitTerminator};
  54 #[unstable(feature = "utf8_chunks", issue = "99543")]
  55 pub use core::str::{Utf8Chunk, Utf8Chunks};
  56
  57 /// Note: `str` in `Concat<str>` is not meaningful here.
  58 /// This type parameter of the trait only exists to enable another impl.
  59 #[cfg(not(no_global_oom_handling))]
  60 #[unstable(feature = "slice_concat_ext", issue = "27747")]
  61 impl<S: Borrow<str>> Concat<str> for [S] {
  62     type Output = String;
  63
  64     fn concat(slice: &Self) -> String {
  65         Join::join(slice, "")
  66     }
  67 }
  68
  69 #[cfg(not(no_global_oom_handling))]
  70 #[unstable(feature = "slice_concat_ext", issue = "27747")]
  71 impl<S: Borrow<str>> Join<&str> for [S] {
  72     type Output = String;
  73
  74     fn join(slice: &Self, sep: &str) -> String {
  75         unsafe { String::from_utf8_unchecked(join_generic_copy(slice, sep.as_bytes())) }
  76     }
  77 }
  78
  79 #[cfg(not(no_global_oom_handling))]
  80 macro_rules! specialize_for_lengths {
  81     ($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => {{
  82         let mut target = $target;
  83         let iter = $iter;
  84         let sep_bytes = $separator;
  85         match $separator.len() {
  86             $(
  87                 // loops with hardcoded sizes run much faster
  88                 // specialize the cases with small separator lengths
  89                 $num => {
  90                     for s in iter {
  91                         copy_slice_and_advance!(target, sep_bytes);
  92                         let content_bytes = s.borrow().as_ref();
  93                         copy_slice_and_advance!(target, content_bytes);
  94                     }
  95                 },
  96             )*
  97             _ => {
  98                 // arbitrary non-zero size fallback
  99                 for s in iter {
 100                     copy_slice_and_advance!(target, sep_bytes);
 101                     let content_bytes = s.borrow().as_ref();
 102                     copy_slice_and_advance!(target, content_bytes);
 103                 }
 104             }
 105         }
 106         target
 107     }}
 108 }
 109
 110 #[cfg(not(no_global_oom_handling))]
 111 macro_rules! copy_slice_and_advance {
 112     ($target:expr, $bytes:expr) => {
 113         let len = $bytes.len();
 114         let (head, tail) = { $target }.split_at_mut(len);
 115         head.copy_from_slice($bytes);
 116         $target = tail;
 117     };
 118 }
 119
 120 // Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec
 121 // Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262)
 122 // For this reason SliceConcat<T> is not specialized for T: Copy and SliceConcat<str> is the
 123 // only user of this function. It is left in place for the time when that is fixed.
 124 //
 125 // the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]>
 126 // [T] and str both impl AsRef<[T]> for some T
 127 // => s.borrow().as_ref() and we always have slices
 128 #[cfg(not(no_global_oom_handling))]
 129 fn join_generic_copy<B, T, S>(slice: &[S], sep: &[T]) -> Vec<T>
 130 where
 131     T: Copy,
 132     B: AsRef<[T]> + ?Sized,
 133     S: Borrow<B>,
 134 {
 135     let sep_len = sep.len();
 136     let mut iter = slice.iter();
 137
 138     // the first slice is the only one without a separator preceding it
 139     let first = match iter.next() {
 140         Some(first) => first,
 141         None => return vec![],
 142     };
 143
 144     // compute the exact total length of the joined Vec
 145     // if the `len` calculation overflows, we'll panic
 146     // we would have run out of memory anyway and the rest of the function requires
 147     // the entire Vec pre-allocated for safety
 148     let reserved_len = sep_len
 149         .checked_mul(iter.len())
 150         .and_then(|n| {
 151             slice.iter().map(|s| s.borrow().as_ref().len()).try_fold(n, usize::checked_add)
 152         })
 153         .expect("attempt to join into collection with len > usize::MAX");
 154
 155     // prepare an uninitialized buffer
 156     let mut result = Vec::with_capacity(reserved_len);
 157     debug_assert!(result.capacity() >= reserved_len);
 158
 159     result.extend_from_slice(first.borrow().as_ref());
 160
 161     unsafe {
 162         let pos = result.len();
 163         let target = result.spare_capacity_mut().get_unchecked_mut(..reserved_len - pos);
 164
 165         // Convert the separator and slices to slices of MaybeUninit
 166         // to simplify implementation in specialize_for_lengths
 167         let sep_uninit = core::slice::from_raw_parts(sep.as_ptr().cast(), sep.len());
 168         let iter_uninit = iter.map(|it| {
 169             let it = it.borrow().as_ref();
 170             core::slice::from_raw_parts(it.as_ptr().cast(), it.len())
 171         });
 172
 173         // copy separator and slices over without bounds checks
 174         // generate loops with hardcoded offsets for small separators
 175         // massive improvements possible (~ x2)
 176         let remain = specialize_for_lengths!(sep_uninit, target, iter_uninit; 0, 1, 2, 3, 4);
 177
 178         // A weird borrow implementation may return different
 179         // slices for the length calculation and the actual copy.
 180         // Make sure we don't expose uninitialized bytes to the caller.
 181         let result_len = reserved_len - remain.len();
 182         result.set_len(result_len);
 183     }
 184     result
 185 }
 186
 187 #[stable(feature = "rust1", since = "1.0.0")]
 188 impl Borrow<str> for String {
 189     #[inline]
 190     fn borrow(&self) -> &str {
 191         &self[..]
 192     }
 193 }
 194
 195 #[stable(feature = "string_borrow_mut", since = "1.36.0")]
 196 impl BorrowMut<str> for String {
 197     #[inline]
 198     fn borrow_mut(&mut self) -> &mut str {
 199         &mut self[..]
 200     }
 201 }
 202
 203 #[cfg(not(no_global_oom_handling))]
 204 #[stable(feature = "rust1", since = "1.0.0")]
 205 impl ToOwned for str {
 206     type Owned = String;
 207     #[inline]
 208     fn to_owned(&self) -> String {
 209         unsafe { String::from_utf8_unchecked(self.as_bytes().to_owned()) }
 210     }
 211
 212     fn clone_into(&self, target: &mut String) {
 213         let mut b = mem::take(target).into_bytes();
 214         self.as_bytes().clone_into(&mut b);
 215         *target = unsafe { String::from_utf8_unchecked(b) }
 216     }
 217 }
 218
 219 /// Methods for string slices.
 220 #[cfg(not(test))]
 221 impl str {
 222     /// Converts a `Box<str>` into a `Box<[u8]>` without copying or allocating.
 223     ///
 224     /// # Examples
 225     ///
 226     /// Basic usage:
 227     ///
 228     /// ```
 229     /// let s = "this is a string";
 230     /// let boxed_str = s.to_owned().into_boxed_str();
 231     /// let boxed_bytes = boxed_str.into_boxed_bytes();
 232     /// assert_eq!(*boxed_bytes, *s.as_bytes());
 233     /// ```
 234     #[rustc_allow_incoherent_impl]
 235     #[stable(feature = "str_box_extras", since = "1.20.0")]
 236     #[must_use = "`self` will be dropped if the result is not used"]
 237     #[inline]
 238     pub fn into_boxed_bytes(self: Box<str>) -> Box<[u8]> {
 239         self.into()
 240     }
 241
 242     /// Replaces all matches of a pattern with another string.
 243     ///
 244     /// `replace` creates a new [`String`], and copies the data from this string slice into it.
 245     /// While doing so, it attempts to find matches of a pattern. If it finds any, it
 246     /// replaces them with the replacement string slice.
 247     ///
 248     /// # Examples
 249     ///
 250     /// Basic usage:
 251     ///
 252     /// ```
 253     /// let s = "this is old";
 254     ///
 255     /// assert_eq!("this is new", s.replace("old", "new"));
 256     /// assert_eq!("than an old", s.replace("is", "an"));
 257     /// ```
 258     ///
 259     /// When the pattern doesn't match:
 260     ///
 261     /// ```
 262     /// let s = "this is old";
 263     /// assert_eq!(s, s.replace("cookie monster", "little lamb"));
 264     /// ```
 265     #[cfg(not(no_global_oom_handling))]
 266     #[rustc_allow_incoherent_impl]
 267     #[must_use = "this returns the replaced string as a new allocation, \
 268                   without modifying the original"]
 269     #[stable(feature = "rust1", since = "1.0.0")]
 270     #[inline]
 271     pub fn replace<'a, P: Pattern<'a>>(&'a self, from: P, to: &str) -> String {
 272         let mut result = String::new();
 273         let mut last_end = 0;
 274         for (start, part) in self.match_indices(from) {
 275             result.push_str(unsafe { self.get_unchecked(last_end..start) });
 276             result.push_str(to);
 277             last_end = start + part.len();
 278         }
 279         result.push_str(unsafe { self.get_unchecked(last_end..self.len()) });
 280         result
 281     }
 282
 283     /// Replaces first N matches of a pattern with another string.
 284     ///
 285     /// `replacen` creates a new [`String`], and copies the data from this string slice into it.
 286     /// While doing so, it attempts to find matches of a pattern. If it finds any, it
 287     /// replaces them with the replacement string slice at most `count` times.
 288     ///
 289     /// # Examples
 290     ///
 291     /// Basic usage:
 292     ///
 293     /// ```
 294     /// let s = "foo foo 123 foo";
 295     /// assert_eq!("new new 123 foo", s.replacen("foo", "new", 2));
 296     /// assert_eq!("faa fao 123 foo", s.replacen('o', "a", 3));
 297     /// assert_eq!("foo foo new23 foo", s.replacen(char::is_numeric, "new", 1));
 298     /// ```
 299     ///
 300     /// When the pattern doesn't match:
 301     ///
 302     /// ```
 303     /// let s = "this is old";
 304     /// assert_eq!(s, s.replacen("cookie monster", "little lamb", 10));
 305     /// ```
 306     #[cfg(not(no_global_oom_handling))]
 307     #[rustc_allow_incoherent_impl]
 308     #[must_use = "this returns the replaced string as a new allocation, \
 309                   without modifying the original"]
 310     #[stable(feature = "str_replacen", since = "1.16.0")]
 311     pub fn replacen<'a, P: Pattern<'a>>(&'a self, pat: P, to: &str, count: usize) -> String {
 312         // Hope to reduce the times of re-allocation
 313         let mut result = String::with_capacity(32);
 314         let mut last_end = 0;
 315         for (start, part) in self.match_indices(pat).take(count) {
 316             result.push_str(unsafe { self.get_unchecked(last_end..start) });
 317             result.push_str(to);
 318             last_end = start + part.len();
 319         }
 320         result.push_str(unsafe { self.get_unchecked(last_end..self.len()) });
 321         result
 322     }
 323
 324     /// Returns the lowercase equivalent of this string slice, as a new [`String`].
 325     ///
 326     /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
 327     /// `Lowercase`.
 328     ///
 329     /// Since some characters can expand into multiple characters when changing
 330     /// the case, this function returns a [`String`] instead of modifying the
 331     /// parameter in-place.
 332     ///
 333     /// # Examples
 334     ///
 335     /// Basic usage:
 336     ///
 337     /// ```
 338     /// let s = "HELLO";
 339     ///
 340     /// assert_eq!("hello", s.to_lowercase());
 341     /// ```
 342     ///
 343     /// A tricky example, with sigma:
 344     ///
 345     /// ```
 346     /// let sigma = "Σ";
 347     ///
 348     /// assert_eq!("σ", sigma.to_lowercase());
 349     ///
 350     /// // but at the end of a word, it's ς, not σ:
 351     /// let odysseus = "ὈΔΥΣΣΕΎΣ";
 352     ///
 353     /// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
 354     /// ```
 355     ///
 356     /// Languages without case are not changed:
 357     ///
 358     /// ```
 359     /// let new_year = "农历新年";
 360     ///
 361     /// assert_eq!(new_year, new_year.to_lowercase());
 362     /// ```
 363     #[cfg(not(no_global_oom_handling))]
 364     #[rustc_allow_incoherent_impl]
 365     #[must_use = "this returns the lowercase string as a new String, \
 366                   without modifying the original"]
 367     #[stable(feature = "unicode_case_mapping", since = "1.2.0")]
 368     pub fn to_lowercase(&self) -> String {
 369         let out = convert_while_ascii(self.as_bytes(), u8::to_ascii_lowercase);
 370
 371         // Safety: we know this is a valid char boundary since
 372         // out.len() is only progressed if ascii bytes are found
 373         let rest = unsafe { self.get_unchecked(out.len()..) };
 374
 375         // Safety: We have written only valid ASCII to our vec
 376         let mut s = unsafe { String::from_utf8_unchecked(out) };
 377
 378         for (i, c) in rest[..].char_indices() {
 379             if c == 'Σ' {
 380                 // Σ maps to σ, except at the end of a word where it maps to ς.
 381                 // This is the only conditional (contextual) but language-independent mapping
 382                 // in `SpecialCasing.txt`,
 383                 // so hard-code it rather than have a generic "condition" mechanism.
 384                 // See https://github.com/rust-lang/rust/issues/26035
 385                 map_uppercase_sigma(rest, i, &mut s)
 386             } else {
 387                 match conversions::to_lower(c) {
 388                     [a, '\0', _] => s.push(a),
 389                     [a, b, '\0'] => {
 390                         s.push(a);
 391                         s.push(b);
 392                     }
 393                     [a, b, c] => {
 394                         s.push(a);
 395                         s.push(b);
 396                         s.push(c);
 397                     }
 398                 }
 399             }
 400         }
 401         return s;
 402
 403         fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) {
 404             // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
 405             // for the definition of `Final_Sigma`.
 406             debug_assert!('Σ'.len_utf8() == 2);
 407             let is_word_final = case_ignoreable_then_cased(from[..i].chars().rev())
 408                 && !case_ignoreable_then_cased(from[i + 2..].chars());
 409             to.push_str(if is_word_final { "ς" } else { "σ" });
 410         }
 411
 412         fn case_ignoreable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
 413             use core::unicode::{Case_Ignorable, Cased};
 414             match iter.skip_while(|&c| Case_Ignorable(c)).next() {
 415                 Some(c) => Cased(c),
 416                 None => false,
 417             }
 418         }
 419     }
 420
 421     /// Returns the uppercase equivalent of this string slice, as a new [`String`].
 422     ///
 423     /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
 424     /// `Uppercase`.
 425     ///
 426     /// Since some characters can expand into multiple characters when changing
 427     /// the case, this function returns a [`String`] instead of modifying the
 428     /// parameter in-place.
 429     ///
 430     /// # Examples
 431     ///
 432     /// Basic usage:
 433     ///
 434     /// ```
 435     /// let s = "hello";
 436     ///
 437     /// assert_eq!("HELLO", s.to_uppercase());
 438     /// ```
 439     ///
 440     /// Scripts without case are not changed:
 441     ///
 442     /// ```
 443     /// let new_year = "农历新年";
 444     ///
 445     /// assert_eq!(new_year, new_year.to_uppercase());
 446     /// ```
 447     ///
 448     /// One character can become multiple:
 449     /// ```
 450     /// let s = "tschüß";
 451     ///
 452     /// assert_eq!("TSCHÜSS", s.to_uppercase());
 453     /// ```
 454     #[cfg(not(no_global_oom_handling))]
 455     #[rustc_allow_incoherent_impl]
 456     #[must_use = "this returns the uppercase string as a new String, \
 457                   without modifying the original"]
 458     #[stable(feature = "unicode_case_mapping", since = "1.2.0")]
 459     pub fn to_uppercase(&self) -> String {
 460         let out = convert_while_ascii(self.as_bytes(), u8::to_ascii_uppercase);
 461
 462         // Safety: we know this is a valid char boundary since
 463         // out.len() is only progressed if ascii bytes are found
 464         let rest = unsafe { self.get_unchecked(out.len()..) };
 465
 466         // Safety: We have written only valid ASCII to our vec
 467         let mut s = unsafe { String::from_utf8_unchecked(out) };
 468
 469         for c in rest.chars() {
 470             match conversions::to_upper(c) {
 471                 [a, '\0', _] => s.push(a),
 472                 [a, b, '\0'] => {
 473                     s.push(a);
 474                     s.push(b);
 475                 }
 476                 [a, b, c] => {
 477                     s.push(a);
 478                     s.push(b);
 479                     s.push(c);
 480                 }
 481             }
 482         }
 483         s
 484     }
 485
 486     /// Converts a [`Box<str>`] into a [`String`] without copying or allocating.
 487     ///
 488     /// # Examples
 489     ///
 490     /// Basic usage:
 491     ///
 492     /// ```
 493     /// let string = String::from("birthday gift");
 494     /// let boxed_str = string.clone().into_boxed_str();
 495     ///
 496     /// assert_eq!(boxed_str.into_string(), string);
 497     /// ```
 498     #[stable(feature = "box_str", since = "1.4.0")]
 499     #[rustc_allow_incoherent_impl]
 500     #[must_use = "`self` will be dropped if the result is not used"]
 501     #[inline]
 502     pub fn into_string(self: Box<str>) -> String {
 503         let slice = Box::<[u8]>::from(self);
 504         unsafe { String::from_utf8_unchecked(slice.into_vec()) }
 505     }
 506
 507     /// Creates a new [`String`] by repeating a string `n` times.
 508     ///
 509     /// # Panics
 510     ///
 511     /// This function will panic if the capacity would overflow.
 512     ///
 513     /// # Examples
 514     ///
 515     /// Basic usage:
 516     ///
 517     /// ```
 518     /// assert_eq!("abc".repeat(4), String::from("abcabcabcabc"));
 519     /// ```
 520     ///
 521     /// A panic upon overflow:
 522     ///
 523     /// ```should_panic
 524     /// // this will panic at runtime
 525     /// let huge = "0123456789abcdef".repeat(usize::MAX);
 526     /// ```
 527     #[cfg(not(no_global_oom_handling))]
 528     #[rustc_allow_incoherent_impl]
 529     #[must_use]
 530     #[stable(feature = "repeat_str", since = "1.16.0")]
 531     pub fn repeat(&self, n: usize) -> String {
 532         unsafe { String::from_utf8_unchecked(self.as_bytes().repeat(n)) }
 533     }
 534
 535     /// Returns a copy of this string where each character is mapped to its
 536     /// ASCII upper case equivalent.
 537     ///
 538     /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
 539     /// but non-ASCII letters are unchanged.
 540     ///
 541     /// To uppercase the value in-place, use [`make_ascii_uppercase`].
 542     ///
 543     /// To uppercase ASCII characters in addition to non-ASCII characters, use
 544     /// [`to_uppercase`].
 545     ///
 546     /// # Examples
 547     ///
 548     /// ```
 549     /// let s = "Grüße, Jürgen ❤";
 550     ///
 551     /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase());
 552     /// ```
 553     ///
 554     /// [`make_ascii_uppercase`]: str::make_ascii_uppercase
 555     /// [`to_uppercase`]: #method.to_uppercase
 556     #[cfg(not(no_global_oom_handling))]
 557     #[rustc_allow_incoherent_impl]
 558     #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
 559     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
 560     #[inline]
 561     pub fn to_ascii_uppercase(&self) -> String {
 562         let mut bytes = self.as_bytes().to_vec();
 563         bytes.make_ascii_uppercase();
 564         // make_ascii_uppercase() preserves the UTF-8 invariant.
 565         unsafe { String::from_utf8_unchecked(bytes) }
 566     }
 567
 568     /// Returns a copy of this string where each character is mapped to its
 569     /// ASCII lower case equivalent.
 570     ///
 571     /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
 572     /// but non-ASCII letters are unchanged.
 573     ///
 574     /// To lowercase the value in-place, use [`make_ascii_lowercase`].
 575     ///
 576     /// To lowercase ASCII characters in addition to non-ASCII characters, use
 577     /// [`to_lowercase`].
 578     ///
 579     /// # Examples
 580     ///
 581     /// ```
 582     /// let s = "Grüße, Jürgen ❤";
 583     ///
 584     /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase());
 585     /// ```
 586     ///
 587     /// [`make_ascii_lowercase`]: str::make_ascii_lowercase
 588     /// [`to_lowercase`]: #method.to_lowercase
 589     #[cfg(not(no_global_oom_handling))]
 590     #[rustc_allow_incoherent_impl]
 591     #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
 592     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
 593     #[inline]
 594     pub fn to_ascii_lowercase(&self) -> String {
 595         let mut bytes = self.as_bytes().to_vec();
 596         bytes.make_ascii_lowercase();
 597         // make_ascii_lowercase() preserves the UTF-8 invariant.
 598         unsafe { String::from_utf8_unchecked(bytes) }
 599     }
 600 }
 601
 602 /// Converts a boxed slice of bytes to a boxed string slice without checking
 603 /// that the string contains valid UTF-8.
 604 ///
 605 /// # Examples
 606 ///
 607 /// Basic usage:
 608 ///
 609 /// ```
 610 /// let smile_utf8 = Box::new([226, 152, 186]);
 611 /// let smile = unsafe { std::str::from_boxed_utf8_unchecked(smile_utf8) };
 612 ///
 613 /// assert_eq!("☺", &*smile);
 614 /// ```
 615 #[stable(feature = "str_box_extras", since = "1.20.0")]
 616 #[must_use]
 617 #[inline]
 618 pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
 619     unsafe { Box::from_raw(Box::into_raw(v) as *mut str) }
 620 }
 621
 622 /// Converts the bytes while the bytes are still ascii.
 623 /// For better average performance, this is happens in chunks of `2*size_of::<usize>()`.
 624 /// Returns a vec with the converted bytes.
 625 #[inline]
 626 #[cfg(not(test))]
 627 #[cfg(not(no_global_oom_handling))]
 628 fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec<u8> {
 629     let mut out = Vec::with_capacity(b.len());
 630
 631     const USIZE_SIZE: usize = mem::size_of::<usize>();
 632     const MAGIC_UNROLL: usize = 2;
 633     const N: usize = USIZE_SIZE * MAGIC_UNROLL;
 634     const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; USIZE_SIZE]);
 635
 636     let mut i = 0;
 637     unsafe {
 638         while i + N <= b.len() {
 639             // Safety: we have checks the sizes `b` and `out` to know that our
 640             let in_chunk = b.get_unchecked(i..i + N);
 641             let out_chunk = out.spare_capacity_mut().get_unchecked_mut(i..i + N);
 642
 643             let mut bits = 0;
 644             for j in 0..MAGIC_UNROLL {
 645                 // read the bytes 1 usize at a time (unaligned since we haven't checked the alignment)
 646                 // safety: in_chunk is valid bytes in the range
 647                 bits |= in_chunk.as_ptr().cast::<usize>().add(j).read_unaligned();
 648             }
 649             // if our chunks aren't ascii, then return only the prior bytes as init
 650             if bits & NONASCII_MASK != 0 {
 651                 break;
 652             }
 653
 654             // perform the case conversions on N bytes (gets heavily autovec'd)
 655             for j in 0..N {
 656                 // safety: in_chunk and out_chunk is valid bytes in the range
 657                 let out = out_chunk.get_unchecked_mut(j);
 658                 out.write(convert(in_chunk.get_unchecked(j)));
 659             }
 660
 661             // mark these bytes as initialised
 662             i += N;
 663         }
 664         out.set_len(i);
 665     }
 666
 667     out
 668 }